---
title: "Replication code for Bridging the Divide: Social Capital’s Mediating Impact on Partisan Polarization and Health"
author: "Timothy Fraser and colleagues."
date: "Sept. 10, 2020"
output: html_notebook
---

This RMarkdown document outlines the code necessary to compare our survey results with county level results from the BRFSS and develop a set of models for the effects of social capital and polarization on health.

# 0. Packages

```{r, message=FALSE, warning = FALSE}
library(tidyverse)
library(survey)
select <- dplyr::select
```

# 1. Survey Data

First, we're going to take the raw survey results and convert them into a working dataset.

## 1.1 Filter to Valid Responses

Let's filter to only responses from respondents on or after December 23, 2019. It was after this date that we finalized the survey.

```{r}

read_csv("survey_builder/polarization_and_health_survey_jan_6_2020.csv")[-c(1:2), ] %>%
  rename(date = StartDate,  complete = `good complete`) %>%
   # Format date
  mutate(year = date %>% word(3, sep = "/") %>% word(1, sep = " "),
         month = date %>% word(1, sep = "/"),
         day = date %>% word(2, sep = "/"),
         hour = date %>% word(2, sep = " ") %>% word(1, sep = ":"),
         min = date %>% word(2, sep = " ") %>% word(2, sep = ":")) %>%
  mutate_at(vars(year, month, day, hour, min), funs(. %>% as.numeric)) %>%
  mutate(date = lubridate::make_datetime(year = year, month = month, day = day, 
                                         hour = hour, min = min)) %>%
  # Filter to after we made the edits to question wordings
  filter(date >= "2019-12-23", complete == 1) %>% 
  # Export to file
  saveRDS("survey_builder/responses.rds")
```

## 1.2 Grab respondent geographic identifiers.

```{r, message = FALSE}
dat <- read_rds("survey_builder/responses.rds") %>%
  # Relabel unique identifier and geographic information.
  rename(id = ResponseId,
         zip = GEO_ZIP_1_1,
         state = GEO_STATE_1,
         county = GEO_COUNTY)

# To view question headers, use this code.
vars = read_csv("survey_builder/polarization_and_health_survey_jan_6_2020.csv")[c(1:2), ] %>%
   t() %>% as.data.frame() %>%
  tibble::rownames_to_column(var = "header") %>%  magrittr::set_colnames(c("header", "header_complete", "question_text"))

```

## 1.3 Calculating the CESDR-7

```{r cesdr7, message = FALSE}

# First, we design a function that will recode values with the following numeric values.
recode_CESDR7 = function(data){
  # "Assign a score from 0 to 3 to each of the 7 items in the battery.
  dplyr::recode(data, 
         # For RARELY/NONE OF THE TIME/1 DAY, give 0 points.
         "Rarely or none of the time (1 day)" = 0, 
         # For  SOME/A LITTLE OF THE TIME/1-2 DAYS, give 1 point. 
         "Some or a little of the time (1-2 days)" = 1,
         # For  OCCASIONALLY/MODERATE AMT OF THE TIME/3-4 DAYS, give 2 points.
         "Occasionally or a moderate amount of time (3-4 days)" = 2,
         # For MOST/ALL OF THE TIME/5-7 DAYS, give 3 points. Add up the total score.
         "Most or all of the time (5-7 days)" = 3)}


dat <- dat %>%
  #Then, we run that function on each of the 
  # questions asked in the CESDR7 battery.
  mutate_at(vars("CESDR7_1", "CESDR7_2", "CESDR7_3", 
                 "CESDR7_4", "CESDR7_5", "CESDR7_6", "CESDR7_7"),
    recode_CESDR7) %>% 
  # Then, we create a new variable CESDR7, 
  # which represents the 0-21 scale of mental/emotional distress
  mutate(cesdr7 = CESDR7_1 + CESDR7_2 + CESDR7_3 +
                      CESDR7_4 + CESDR7_5 + CESDR7_6 + CESDR7_7) %>%
  # The CESD-R 10 ranges from 0 - 60, 
  # weighted # to match the CESD-R 20, whose range is 60. 
  # But the CESD-R-7, the shorter, validated measure we use ranges from 0-21. 
  # we rescale to 0-60 to make each observation comparable with past CESD-R 10 results.
  mutate(cesdr7 = cesdr7*60/21) %>%
  # Remove now unnecessary CESDR7 component measures
  select(-CESDR7_1, -CESDR7_2, -CESDR7_3,
                      -CESDR7_4, -CESDR7_5, -CESDR7_6, -CESDR7_7)

# Clean unnecessary data
remove(recode_CESDR7)
# Note: The CESDR7 (0-60) appears to have zero-inflation, 
# because many people who do not experience any elevated mental/emotional distress reported 0.
```


## 1.4 Calculate Self-Reported Health Measures

```{r, message = FALSE}
# Recode self-reported health in present

dat <- dat %>%
  # Relabel unique identifier and geographic information.
  rename(health_2020 = HEALTH1,
         health_chg_2016_2020 = HEALTH2016,
         days_poor_physical_health = HEALTH2,
         days_poor_mental_health = Q118,
         happiness_politics_in_news = NEWSA) %>%
  mutate(health_2020 = dplyr::recode(
    health_2020,
    "Poor" = 1,
    "Fair" = 2,
    "Good" = 3,
    "Very Good" = 4,
    "Excellent" = 5)) %>%
  # Recode self-reported change in health since 2016
  mutate(health_chg_2016_2020 = dplyr::recode(
    health_chg_2016_2020,
    "Much worse now" = -2,
    "Worse now" = -1,
    "No change" = 0,
    "Better now" = -1,
    "Much better now" = 2)) %>%

  # Identify number of days in a month respondent reported poor physicial or mental health
  # Convert those numbers to numeric
  mutate(days_poor_physical_health = as.numeric(days_poor_physical_health),
         days_poor_mental_health = as.numeric(days_poor_mental_health)) %>%
  
  # Identify and recode happiness level when reading news about politics
  mutate(happiness_politics_in_news = dplyr::recode(
    happiness_politics_in_news,
    "very unhappy" = -2,
    "somewhat unhappy" = -1,
    "neither happy nor unhappy" = 0,
    "somewhat happy" = 1,
    "very happy" = 2))
```


## 1.5 Calculate Height, Weight, and BMI in the present and 2015

```{r, message = FALSE, warning = FALSE}

# Clean Variables
dat <- dat %>%
  # Relabel unique identifier and information.
  rename(height_feet = HEIGHT_1_1,
         height_inches = HEIGHT_1_2,
         weight = WEIGHT, 
         weight_2015 = WEIGHT2015) %>%
  
  # Keep response only if valid according to the following conditions
  mutate(
    
    height_feet = if_else(
    # If height is between 2-9 feet and is numeric, keep
    # (The shortest person in the world was 2'11"; the tallest 8'11")
    as.numeric(height_feet) > 2 & as.numeric(height_feet) < 9, 
    height_feet, NA_character_) %>% as.numeric(),
    
    height_inches = if_else(
      # If height in inches is 0 or greater but less than 12, keep
      as.numeric(height_inches) >= 0 & as.numeric(height_inches) < 12,
      height_inches, NA_character_) %>% as.numeric(), 

    weight = if_else(
      # If weight is over 20 pounds & under 1400 pounds (heaviest person in world), 
      # we keep the observation as valid
      as.numeric(weight) > 20 & as.numeric(weight) < 1400, 
      # and multiply by this constant to get number of kilograms
      as.numeric(weight), NA_real_) %>% as.numeric(),
    
    weight_2015 = if_else(
      # If weight is over 20 pounds & under 1400 pounds (heaviest person in world), 
      # we keep the observation as valid
      as.numeric(weight_2015) > 20 & as.numeric(weight_2015) < 1400, 
      as.numeric(weight_2015), NA_real_) %>% as.numeric()) %>%
  
  # Next, calculate BMI
  mutate(
  # Calculate BMI if valid data present
  
      bmi = if_else(
    
    # If height in feet and inches and weight are available,
    !is.na(height_feet) & 
      !is.na(height_inches) & 
      !is.na(weight),
    # calculate BMI
    weight*0.453592 / ((height_feet + (height_inches / 12))*0.3048) ^2, 
    
    # If height in feet and weight are available, but inches is blank,
    # assume inches to equal 0.
    if_else(!is.na(height_feet) & 
              is.na(height_inches) & 
              !is.na(weight),
            # calculate BMI
            weight*0.453592 / (height_feet*0.3048)^2,
            # Otherwise, set to 0
            NA_real_)),
    
    # Calculate BMI_2015 if valid data present
    bmi_2015 = if_else(
      # If height in feet and inches and weight are available,
    !is.na(height_feet) & 
      !is.na(height_inches) & 
      !is.na(weight_2015),
    # calculate BMI
    weight_2015*0.453592 / ((height_feet + (height_inches / 12))*0.3048)^2, 
    
    # If height in feet and weight are available, but inches is blank,
    # assume inches to equal 0.
    if_else(!is.na(height_feet) & 
              is.na(height_inches) & 
              !is.na(weight_2015),
            # calculate BMI
            weight_2015*0.453592 / (height_feet*0.3048)^2,
            # Otherwise, set to 0
            NA_real_))) %>%
  # Calculate change in BMI
  mutate(bmi_change_2015_2020 = if_else(!is.na(bmi) & !is.na(bmi_2015),
                                        bmi_2015 - bmi, NA_real_))
```

## 1.6 Age

```{r, message = FALSE}
# Our respondents indicated either their age or birthyear. 
# Fix it so that they all report their birthyear.
dat <- dat %>%
  # Relabel unique identifier and geographic information.
  mutate(birthyear = if_else(
    # If Age is under 4 characters
    nchar(AGE) < 4, 
    # assume it is an age and subtract the age from 2020 to get birthyear
    2020 - as.numeric(AGE), 
    
    if_else(
      # If Age is 4 characters but before 1880,
      # assume it is an invalid response.
      as.numeric(AGE) < 1880,
      NA_real_,
      
      # If Age is 4 characters but AFTER 1880, 
      # then just convert to numeric and keep
      as.numeric(AGE)))) %>%
  
  # Calculate correct age
  mutate(age = 2020 - birthyear) %>%
  # remove AGE
  select(-AGE) 
```  



## 1.7 Diagnoses

```{r, message = FALSE, warning = FALSE}

recode_conditions = function(data, birthyear){
  if_else(
      # If the age you were diagnosed is A) numeric and B) below 125 years old
      as.numeric(data) < 125, 
      # return age as numeric
      as.numeric(data), 
      # or set to NA
      NA_real_)
}

dat <- dat %>%
  # Rename indicators, which designate whether or not they were diagnosed
  rename(DIAB_PREG = DIAB_PREG,
         condition_high_blood_pressure = CONDITION_1,
         condition_high_cholesterol = CONDITION_2,
         condition_depression = CONDITION_3,
         condition_anxiety = CONDITION_4,
         condition_diabetes = CONDITION_5,
         condition_sleep_disorder = CONDITION_6,
         condition_heart_attack = CONDITION_7,
         # Rename indicators, which designate age of diagnosis         
         condition_high_blood_pressure_age = CONDITION_HBP,
         condition_high_cholesterol_age = CONDITION_HC,
         condition_depression_age = CONDITION_DEP,
         condition_anxiety_age = CONDITION_ANX,
         condition_diabetes_age = CONDITION_DIAB,
         condition_sleep_disorder_age = CONDITION_SLEEP,
         condition_heart_attack_age = CONDITION_HA) %>%
  # Fix ages diagnosed for the following variables
  mutate_at(
    vars("condition_high_blood_pressure_age", 
         "condition_high_cholesterol_age", 
         "condition_depression_age", 
         "condition_anxiety_age", 
         "condition_diabetes_age",
         "condition_sleep_disorder_age", 
         "condition_heart_attack_age"),
    # using the function we wrote above
    recode_conditions) %>%
  
  # Indicate whehter they got each diagnosis during/after 2016 or not
  mutate(
    
    condition_high_blood_pressure = if_else(
      # check if they were diagnosed during or after 2016
      condition_high_blood_pressure == "Yes" & 
        (condition_high_blood_pressure_age + birthyear) >= 2016, "Yes",
      # if not but left a valid response, mark no; otherwise mark NA
      if_else(condition_high_blood_pressure == "No", "No", NA_character_)),
    
    condition_high_cholesterol = if_else(
      # check if they were diagnosed during or after 2016
      condition_high_cholesterol == "Yes" &
        (condition_high_cholesterol_age + birthyear) >= 2016, "Yes",
      # if not but left a valid response, mark no; otherwise mark NA
      if_else(condition_high_cholesterol == "No", "No", NA_character_)),
    
    condition_depression = if_else(
      # check if they were diagnosed during or after 2016
      condition_depression == "Yes" & 
        (condition_depression_age + birthyear) >= 2016, "Yes",
      # if not but left a valid response, mark no; otherwise mark NA
      if_else(condition_depression == "No", "No", NA_character_)),
    
    condition_anxiety = if_else(
      # check if they were diagnosed during or after 2016
      condition_anxiety == "Yes" & 
        (condition_anxiety_age + birthyear) >= 2016, "Yes",
      # if not but left a valid response, mark no; otherwise mark NA
      if_else(condition_anxiety == "No", "No", NA_character_)),
    
    condition_sleep_disorder = if_else(
      # check if they were diagnosed during or after 2016
      condition_sleep_disorder == "Yes" & 
        (condition_sleep_disorder_age + birthyear) >= 2016, "Yes",
      # if not but left a valid response, mark no; otherwise mark NA
      if_else(condition_sleep_disorder == "No", "No", NA_character_)),
    
    condition_heart_attack = if_else(
      # check if they were diagnosed during or after 2016
      condition_heart_attack == "Yes" & 
        (condition_heart_attack_age + birthyear) >= 2016, "Yes",
      # if not but left a valid response, mark no; otherwise mark NA
      if_else(condition_heart_attack == "No", "No", NA_character_)),
    
    condition_diabetes = if_else(
      # check if they were diagnosed during or after 2016
      condition_heart_attack == "Yes" & 
        (condition_heart_attack_age + birthyear) >= 2016 &
        DIAB_PREG == "No", "Yes", # If so, report yes
      # if not but left a valid response, mark no; otherwise mark NA
      if_else(
        # This includes Yes or No, 
        # because you could have reported having diabetes 
        # but been referring to diabetes during pregnancy only
        # so we classify such responses as No, not NA
        condition_heart_attack %in% c("Yes", "No"), "No", NA_character_)))
  

remove(recode_conditions)
```




## 1.8 Change in Health Conditions
```{r, message = FALSE, warning = FALSE}
library(tidyverse)

recode_change_conditions = function(data){
  dplyr::recode(data,
      "Much less now" = -2,
      "Less now" = -1,
      "No change" = 0,
      "More now" = 1,
      "Much more now" = 2)
}

dat <- dat %>%
  # Rename indicators, which designate whether or not they were diagnosed
  rename(chg_sleep_quality = CHG_SLEEP,
         chg_exercise_frequency = CHG_EXERCISE,
         chg_smoke_frequency = CHG_SMOKE,
         chg_stress_amount = CHG_STRESS,
         smoking_100 = SMOKEA,
         smoking_frequency = SMOKEB) %>%
  mutate_at(vars("chg_sleep_quality", 
                 "chg_exercise_frequency", 
                 "chg_smoke_frequency", 
                 "chg_stress_amount"),
            recode_change_conditions) %>%
  mutate(
    # create a high risk from smoking
    high_risk_from_smoking = case_when(
    # If they responded to both questions
    !is.na(smoking_100) & !is.na(smoking_frequency) &
      # smoked at least 100 cigarettes and smoked everyday
      smoking_100 == "Yes" & smoking_frequency == "every day" ~ "yes",
    # If they didn't respond to both questions, mark it as non-applicable
    is.na(smoking_100) | is.na(smoking_frequency) ~ NA_character_,
    TRUE ~ "no")) %>%
      
  mutate(
    # Others use a different definition of smoking,
    # including both smoking every day or even just some days
    # https://www.shadac.org/news/brfss-spotlight-series-adult-smoking-and-e-cigarette-use-united-states-infographic
    risk_from_smoking = case_when(
    # If they responded to both questions
    !is.na(smoking_100) & !is.na(smoking_frequency) &
      # smoked at least 100 cigarettes and smoked everyday
      smoking_100 == "Yes" & smoking_frequency %in% c("every day", "some days") ~ "yes",
        # If they didn't respond to both questions, mark it as non-applicable
    is.na(smoking_100) | is.na(smoking_frequency) ~ NA_character_,
    TRUE ~ "no")) 
```


## 1.9 Physical Activity

```{r, message = FALSE}
dat <- dat %>%
  # select and rename variables
  rename(moderate_physical_activity_unemployed = MOD1A,
         moderate_physical_activity_employed = MOD1B,
         moderate_physical_activity_days = MOD2_1,
         moderate_physical_activity_min = MOD3_1_NaN,
         moderate_physical_activity_hours = MOD3_1_undefined,
         
         vigorous_physical_activity_unemployed = VIG1A,
         vigorous_physical_activity_employed = VIG1B,
         vigorous_physical_activity_days = VIG2_1,
         vigorous_physical_activity_min = VIG3_1_NaN,
         vigorous_physical_activity_hours = VIG3_1_undefined) %>%
  # First, single variables out of the employed and unemployed versions
  mutate(
    moderate_physical_activity = coalesce(
      moderate_physical_activity_employed, moderate_physical_activity_unemployed),
    vigorous_physical_activity = coalesce(
      vigorous_physical_activity_employed, vigorous_physical_activity_unemployed)) %>%
  # Get rid of unnecessary variables
  select(-c(moderate_physical_activity_employed, moderate_physical_activity_unemployed,
            vigorous_physical_activity_employed, vigorous_physical_activity_unemployed)) %>%
  mutate_at(vars(
    "moderate_physical_activity_days", "vigorous_physical_activity_days",
    "moderate_physical_activity_min", "moderate_physical_activity_hours",
    "vigorous_physical_activity_min", "vigorous_physical_activity_hours"),
    as.numeric) %>%
  
  # Clean Timing Data
  mutate(
    # Minutes must be under 60; Hours must be under 24
    moderate_physical_activity_min = if_else(
      moderate_physical_activity_min < 60, moderate_physical_activity_min, NA_real_),
    moderate_physical_activity_hours = if_else(
      moderate_physical_activity_hours < 24, moderate_physical_activity_hours, NA_real_),
    vigorous_physical_activity_min = if_else(
      vigorous_physical_activity_min < 60, vigorous_physical_activity_min, NA_real_),
    vigorous_physical_activity_hours = if_else(
      vigorous_physical_activity_hours < 24, vigorous_physical_activity_hours, NA_real_)) %>%
  
  # Calculate single time measure
  mutate(
    # First for moderate activity
    moderate_physical_activity_time = if_else(
      # If both minutes and hours are there, add together;
      !is.na(moderate_physical_activity_min) & !is.na(moderate_physical_activity_hours),
      (moderate_physical_activity_hours*60) + (moderate_physical_activity_min),
      # If only minutes is there, just take minutes
      if_else(
        !is.na(moderate_physical_activity_min) & is.na(moderate_physical_activity_hours),
        (moderate_physical_activity_min),
        # If only hours is there, just take hours
        if_else(
          !is.na(moderate_physical_activity_hours) & is.na(moderate_physical_activity_min),
          (moderate_physical_activity_hours*60), NA_real_))),
    
    # Now for vigorous activity
        vigorous_physical_activity_time = if_else(
      # If both minutes and hours are there, add together;
      !is.na(vigorous_physical_activity_min) & !is.na(vigorous_physical_activity_hours),
      (vigorous_physical_activity_hours*60) + (vigorous_physical_activity_min),
      # If only minutes is there, just take minutes
      if_else(
        !is.na(vigorous_physical_activity_min) & is.na(vigorous_physical_activity_hours),
        (vigorous_physical_activity_min),
        # If only hours is there, just take hours
        if_else(
          !is.na(vigorous_physical_activity_hours) & is.na(vigorous_physical_activity_min),
          (vigorous_physical_activity_hours*60), NA_real_)))) %>%
  
  # remove extraneous variables
  select(-c(vigorous_physical_activity_hours, vigorous_physical_activity_min,
            moderate_physical_activity_hours, moderate_physical_activity_min)) %>%
  
  
  # Create measures
  mutate(
    # Calculate time spent per week
    moderate_physical_activity = if_else(
      moderate_physical_activity == "Yes" &
        !is.na(moderate_physical_activity_days) &
        !is.na(moderate_physical_activity_time),
      moderate_physical_activity_days*moderate_physical_activity_time, NA_real_),
    # Calculate time spent per week    
    vigorous_physical_activity = if_else(
      vigorous_physical_activity == "Yes" &
        !is.na(vigorous_physical_activity_days) &
        !is.na(vigorous_physical_activity_time),
      vigorous_physical_activity_days*vigorous_physical_activity_time, NA_real_)) %>% 
  
  # Create categorical measure
  mutate(recommended_physical_activity = if_else(
    # If either measure has a valid response,
    !is.na(moderate_physical_activity) | !is.na(vigorous_physical_activity),
    
    if_else(
    # Then take the sum of what is there, excluding NAs
    # If greater than or equal to 75, mark yes. If under, mark no.
    ((rowSums(.["moderate_physical_activity"], na.rm = TRUE) / 2) + 
       rowSums(.["vigorous_physical_activity"], na.rm = TRUE)) >= 75,
    "yes", "no"),
    # If no measure has a valid response, set to NA
    NA_character_)) %>%
  select(-vigorous_physical_activity_days, -vigorous_physical_activity_time,
         -moderate_physical_activity_days, -moderate_physical_activity_time)
```
    
## 1.10 Perceived Polarization Variables

```{r, message = FALSE}
# Estimate Perceived Polarization & Partisan Gap
dat <- dat %>%
  # rename to easier names
  rename(chg_political_differences = CHG_POLDIFF,
         self = SELF_1_1,
         self_2016 = SELF_2016_1_1,
         
         # Voters
         state_voter_avg = STATE_VOTER_1_1,
         state_voter_rep = STATE_VOTER_2_1,
         state_voter_dem = STATE_VOTER_3_1,
         
         state_voter_avg_2016 = STATE_VOTER_2016_1_1,
         state_voter_rep_2016 = STATE_VOTER_2016_2_1,
         state_voter_dem_2016 = STATE_VOTER_2016_3_1,
         
         us_voter_avg = US_VOTER_1_1,
         us_voter_rep = US_VOTER_2_1,
         us_voter_dem = US_VOTER_3_1,
         
         us_voter_avg_2016 = US_VOTER_2016_1_1,
         us_voter_rep_2016 = US_VOTER_2016_2_1,
         us_voter_dem_2016 = US_VOTER_2016_3_1,
         
         # Elected Officials
         state_elect_avg = STATE_ELECT_1_1,
         state_elect_rep = STATE_ELECT_2_1,
         state_elect_dem = STATE_ELECT_3_1,
         
         state_elect_avg_2016 = STATE_ELECT_2016_1_1,
         state_elect_rep_2016 = STATE_ELECT_2016_2_1,
         state_elect_dem_2016 = STATE_ELECT_2016_3_1,
         
         us_elect_avg = US_ELECT_1_1,
         us_elect_rep = US_ELECT_2_1,
         us_elect_dem = US_ELECT_3_1,
         
         us_elect_avg_2016 = US_ELECT_2016_1_1,
         us_elect_rep_2016 = US_ELECT_2016_2_1,
         us_elect_dem_2016 = US_ELECT_2016_3_1) %>%
  
  # Change in Perceived Political Differences between Parties
  mutate(chg_political_differences = dplyr::recode(
    chg_political_differences,
    "much greater now" = 2,
    "greater now" = 1,
    "about the same" = 0,
    "less now" = -1,
    "much less now" = -2)) %>%

# Convert scale measurements to numeric
  mutate_at(vars("self","self_2016", 
                 "state_voter_avg", "state_voter_rep", "state_voter_dem",
                 "us_voter_avg", "us_voter_rep", "us_voter_dem",
                 "state_elect_avg", "state_elect_rep", "state_elect_dem",
                 "us_elect_avg", "us_elect_rep", "us_elect_dem",
                 
                 "state_voter_avg_2016", "state_voter_rep_2016", "state_voter_dem_2016",
                 "us_voter_avg_2016", "us_voter_rep_2016", "us_voter_dem_2016",
                 "state_elect_avg_2016", "state_elect_rep_2016", "state_elect_dem_2016",
                 "us_elect_avg_2016", "us_elect_rep_2016", "us_elect_dem_2016"),
            as.numeric) %>%
mutate(
  # Perceived Difference between Self and Average Voter (present & 2016)
  
  # Average Voter in States  
  diff_self_state_voter_avg = abs(self - state_voter_avg),
  diff_self_state_voter_avg_2016 = abs(self - state_voter_avg_2016),
  # Average Elected Official in US
  diff_self_state_elect_avg = abs(self - state_elect_avg),
  diff_self_state_elect_avg_2016 = abs(self - state_elect_avg_2016),
  # Average Voter in US
  diff_self_us_voter_avg = abs(self - us_voter_avg),
  diff_self_us_voter_avg_2016 = abs(self - us_voter_avg_2016),
  # Average Elected Official in US
  diff_self_us_elect_avg = abs(self - us_elect_avg),
  diff_self_us_elect_avg_2016 = abs(self - us_elect_avg_2016)) %>% 
  # Difference in Differences between Self & Average today versus in 2016
  mutate(
    # Average state voter
    chg_diff_self_state_voter_avg = diff_self_state_voter_avg - diff_self_state_voter_avg_2016,
    # Average state elected official
    chg_diff_self_state_elect_avg = diff_self_state_elect_avg - diff_self_state_elect_avg_2016,
    # Average US voter
    chg_diff_self_us_voter_avg = diff_self_us_voter_avg - diff_self_us_voter_avg_2016,
    # Average US elected official
    chg_diff_self_us_elect_avg = diff_self_us_elect_avg - diff_self_us_elect_avg_2016) %>%
  
  # Perceived Partisan Gap between Republicans and Democrats
  mutate(
    # state voters
    partisan_gap_state_voter = abs(state_voter_rep - state_voter_dem),
    partisan_gap_state_voter_2016 = abs(state_voter_rep_2016 - state_voter_dem_2016),
    # us voters
    partisan_gap_us_voter = abs(us_voter_rep - us_voter_dem),
    partisan_gap_us_voter_2016 = abs(us_voter_rep_2016 - us_voter_dem_2016),
    # state elected officials
    partisan_gap_state_elect = abs(state_elect_rep - state_elect_dem),
    partisan_gap_state_elect_2016 = abs(state_elect_rep_2016 - state_elect_dem_2016),
    # us elected officials
    partisan_gap_us_elect = abs(us_elect_rep - us_elect_dem),
    partisan_gap_us_elect_2016 = abs(us_elect_rep_2016 - us_elect_dem_2016)) %>%
  
  # Change in Perceived Partisan Gap between Republicans and Democrats
   mutate(
    # Average state voter
    chg_partisan_gap_state_voter = partisan_gap_state_voter - partisan_gap_state_voter_2016,
    # Average US voter
    chg_partisan_gap_us_voter = partisan_gap_us_voter - partisan_gap_us_voter_2016,
    # Average state elected official
    chg_partisan_gap_state_elect = partisan_gap_state_elect - partisan_gap_state_elect_2016,
    # Average US elected official
    chg_partisan_gap_us_elect = partisan_gap_us_elect - partisan_gap_us_elect_2016)

```


## 1.11 Voting, Party Identification, and Ideology


```{r}
dat %>%
  saveRDS("dataset_in_progress.rds")

dat <- read_rds("dataset_in_progress.rds")
```


```{r}
# Estimate Perceived Polarization & Partisan Gap
dat <- dat %>%
  # rename to easier names
  rename(#PARTY_A, PARTY_B, PARTY_C, PARTY_D,
         voter_registration = VOTEREG,
         vote_2008 = VOTE_2008_CIVPARTA,
         vote_2012 = VOTE_2012_CIVPARTA,
         vote_2016 = VOTE_2016_CIVPARTA,
         ideology_5 = IDEO,
         
         favor_democrats = FAVOR_1_undefined,
         favor_republicans = FAVOR_2_undefined,
         favor_liberals = FAVOR_3_undefined,
         favor_conservatives = FAVOR_4_undefined) %>%
  # Create a Seven Point Party Identification Scale
  mutate(
    party_id = case_when(
      PARTY_A == "Republican" & PARTY_C == "Strong Republican" ~ "Strong Republican",
      PARTY_A == "Republican" & PARTY_C == "Not very strong Republican" ~ "Weak Republican",
      PARTY_A == "Independent" & PARTY_D == "Republican Party" ~ "Leans Republican",
      PARTY_A == "Not Sure" & PARTY_D == "Republican Party" ~ "Leans Republican",
      PARTY_A == "Other" & PARTY_D == "Republican Party" ~ "Leans Republican",
      PARTY_A == "Independent" &  PARTY_D == "Neither" ~ "Independent",
      PARTY_A == "Not Sure" &  PARTY_D == "Neither" ~ "Independent",
      PARTY_A == "Other" &  PARTY_D == "Neither" ~ "Independent",
      PARTY_A == "Independent" & PARTY_D == "Democratic Party" ~ "Leans Democrat",
      PARTY_A == "Not Sure" & PARTY_D == "Democratic Party" ~ "Leans Democrat",
      PARTY_A == "Other" & PARTY_D == "Democratic Party" ~ "Leans Democrat",
      PARTY_A == "Democrat" & PARTY_B == "Not very strong Democrat" ~ "Weak Democrat",
      PARTY_A == "Democrat" & PARTY_B == "Strong Democrat" ~ "Strong Democrat",
      TRUE ~ NA_character_)) %>%
  # Now recode on a seven point scale from most Democrat (1) to most Republican (7)
  mutate(
    party_7 = party_id %>% dplyr::recode(
      "Strong Republican" = 7,
      "Weak Republican" = 6,
      "Leans Republican" = 5,
      "Independent" = 4,
      "Leans Democrat" = 3,
      "Weak Democrat" = 2,
      "Strong Democrat" = 1,
      .missing = NA_real_)) %>%
  # Now create a simple three-part idicator from this of republican/democrat/independent
  mutate(
    party_3 = dplyr::recode(
      party_7,
      "7" = "Republican",
      "6" = "Republican",
      "5" = "Republican",
      "4" = "Independent",
      "3" = "Democrat",
      "2" = "Democrat",
      "1" = "Democrat")) %>%

  # Create a Seven Point Party Identification Scale
#  mutate(
#    party_7 = if_else(
      # If they selected Republican, output PARTY C responses
 #     PARTY_A == "Republican", PARTY_C,
      # If they selected Democrat, output PARTY B responses,
#      if_else(
#        PARTY_A == "Democrat", PARTY_B,
        # If they selected Independent, output PARTY D responses
#        if_else(
#          PARTY_A == "Independent", PARTY_D,
          # otherwise, output NA
#          NA_character_)))) %>%
  # Now recode on a seven point scale from most Democrat (1) to most Republican (7)
#  mutate(
#    party_7 = dplyr::recode(
#      party_7,
#      "Strong Republican" = 7,
#      "Republican Party" = 6,
#      "Not very strong Republican" = 5,
#      "Neither" = 4,
#      "Not very strong Democrat" = 3,
#      "Democratic Party" = 2,
#      "Strong Democrat" = 1,
#      "Not sure" = NA_real_)) %>%
  # Now create a simple three-part idicator from this of republican/democrat/independent
#  mutate(
#    party_3 = dplyr::recode(
#      party_7,
#      "7" = "Republican",
#      "6" = "Republican",
#      "5" = "Republican",
#      "4" = "Independent",
#      "3" = "Democrat",
#      "2" = "Democrat",
#      "1" = "Democrat")) %>%
  
  mutate_at(vars("favor_democrats", "favor_republicans", 
                 "favor_liberals", "favor_conservatives"),
            as.numeric) %>%
  # Calculate gap in party favorability
  mutate(
    favor_party_gap = abs(favor_republicans - favor_democrats),
    favor_ideology_gap = abs(favor_conservatives - favor_liberals))

```


## 1.12 Affective Polarization


```{r, message = FALSE}
# Affective Polarization by Word Association
dat <- dat %>%
  # rename to easier names
  rename(wordassoc_patriotic = `WORDASSOC#1_1`,
         wordassoc_intelligent = `WORDASSOC#1_3`,
         wordassoc_honest = `WORDASSOC#1_5`,
         wordassoc_generous = `WORDASSOC#1_7`,
         wordassoc_open_minded = `WORDASSOC#1_8`,
         wordassoc_closed_minded = `WORDASSOC#1_2`,
         wordassoc_hypocritical = `WORDASSOC#1_4`,
         wordassoc_selfish = `WORDASSOC#1_6`,
         wordassoc_mean = `WORDASSOC#1_9`) %>%
#  rowwise() %>%
# mutate(valid_responses = sum(
#    !is.na(wordassoc_patriotic),
#    !is.na(wordassoc_intelligent),
#    !is.na(wordassoc_honest), 
#    !is.na(wordassoc_generous),
#    !is.na(wordassoc_open_minded),
#    !is.na(wordassoc_closed_minded),
#    !is.na(wordassoc_hypocritical),
#    !is.na(wordassoc_selfish),
#    !is.na(wordassoc_mean))) %>%
#  ungroup() %>%
  mutate(
    # How many positive words did they attribute to their party?
    same_party_positive = case_when(
      # If they are republican, count how many positive words they use about their own Party
      party_7 %in% 5:7 ~ 
        str_detect(wordassoc_patriotic, "Republican") +
        str_detect(wordassoc_intelligent, "Republican") +
        str_detect(wordassoc_honest, "Republican") + 
        str_detect(wordassoc_generous, "Republican") + 
        str_detect(wordassoc_open_minded, "Republican"),
      # If democrat, add together the number of times that they mention Democrats positively
      party_7 %in% 1:3 ~    
        str_detect(wordassoc_patriotic, "Democrat") +
        str_detect(wordassoc_intelligent, "Democrat") +
        str_detect(wordassoc_honest, "Democrat") + 
        str_detect(wordassoc_generous, "Democrat") + 
        str_detect(wordassoc_open_minded, "Democrat"),
      # If an independent, just log as NA
      TRUE ~ NA_integer_),
    # How many positive words did they attribute to the other party?
    diff_party_positive = case_when(
      # If they are republican, count how many positive words they use Democrats
      party_7 %in% 5:7 ~ 
        str_detect(wordassoc_patriotic, "Democrat") +
        str_detect(wordassoc_intelligent, "Democrat") +
        str_detect(wordassoc_honest, "Democrat") + 
        str_detect(wordassoc_generous, "Democrat") + 
        str_detect(wordassoc_open_minded, "Democrat"),
      # If democrat, add together the number of times 
      # that they mention Republicans positively
      party_7 %in% 1:3 ~       
        str_detect(wordassoc_patriotic, "Republican") +
        str_detect(wordassoc_intelligent, "Republican") +
        str_detect(wordassoc_honest, "Republican") + 
        str_detect(wordassoc_generous, "Republican") + 
        str_detect(wordassoc_open_minded, "Republican"),
      # If an independent, just log as NA
      TRUE ~ NA_integer_),
    
    
    # How many NEGATIVE words did they attribute to THEIR party?
    same_party_negative = case_when(
      party_7 %in% 5:7 ~ 
        str_detect(wordassoc_closed_minded, "Republican") +
        str_detect(wordassoc_hypocritical, "Republican") + 
        str_detect(wordassoc_selfish, "Republican") + 
        str_detect(wordassoc_mean, "Republican"),
      party_7 %in% 1:3 ~       
        str_detect(wordassoc_closed_minded, "Democrat") +
        str_detect(wordassoc_hypocritical, "Democrat") + 
        str_detect(wordassoc_selfish, "Democrat") + 
        str_detect(wordassoc_mean, "Democrat"),
      # If an independent, just log as NA
      TRUE ~ NA_integer_),
    
    
    # How many NEGATIVE words did they attribute to THEIR party?
    diff_party_negative = case_when(
      party_7 %in% 5:7 ~ 
        str_detect(wordassoc_closed_minded, "Democrat") +
        str_detect(wordassoc_hypocritical, "Democrat") + 
        str_detect(wordassoc_selfish, "Democrat") + 
        str_detect(wordassoc_mean, "Democrat"),
      party_7 %in% 1:3 ~       
        str_detect(wordassoc_closed_minded, "Republican") +
        str_detect(wordassoc_hypocritical, "Republican") + 
        str_detect(wordassoc_selfish, "Republican") + 
        str_detect(wordassoc_mean, "Republican"),
      # If an independent, just log as NA
      TRUE ~ NA_integer_)
  ) %>%
  # In a few cases, these received an NA, because NO words were registered.
  # As long as they were eligible 
  mutate_at(vars(same_party_positive, diff_party_positive,
                 same_party_negative, diff_party_negative),
            funs(if_else(
              condition = is.na(.) & 
                party_7 %in% c(1,2,3,5,6,7),
              true = 0, false = as.numeric(.)))) %>%
  # Since there are 5 positive words and 4 negative words,
  # we need to rescale these to a measure that works better,
  # from 0 to 1
  mutate_at(vars(same_party_positive, diff_party_positive,
                 same_party_negative, diff_party_negative),
            funs(scales::rescale(., to = c(0,1)))) %>%
  # Calculate affective polarization, scaled from 0 to 1, 
  # HOW MUCH GREATER is your positivity about your own party
  # than about the opposing party?
  # where 2 = more affective polarization
  # and 0 = less affective polarization
  # Based on how many more good things than bad you say about your party
  mutate(affective_polarization_words = (same_party_positive - same_party_negative) -
           # Minus how how many more good things than bad you say about the opposing party
           (diff_party_positive - diff_party_negative))
```

## 1.13 Party You Would Never Have Voted For

```{r, message = FALSE}
dat <- dat %>%
  rename(would_vote_2016 = WOULDNEVER_1, 
         never_vote_2016 = WOULDNEVER_2,
         would_vote_2020 = WOULDNEVER_3, 
         never_vote_2020 = WOULDNEVER_4) %>%
  rename(ifmarried_rep = IFMARRIED_1,
         ifmarried_dem = IFMARRIED_2)
```



## 1.14 Echo Chamber Variables

```{r, message = FALSE}
dat <- dat %>%
  rename(friends_views_same = FRIENDSA,
         friendship_ends_because_politics = FRIENDSB,
         uses_socialmedia = SOCIALMEDIAA,
         blocks_on_social_media = SOCIALMEDIAB) %>%
  mutate(
    friends_views_same = dplyr::recode(
      friends_views_same,
      "Most of my close friends share my views on government and politics" = 2,
      "Some of my close friends share my views, but many do not" = 1,
      "None of my close friends share my views." = 0,
      "I don't really know what most of my close friends think about government and politics" = NA_real_)) %>%
  mutate(
    # If a social media service is detected in their response, indicate yes
    facebook = if_else(str_detect(uses_socialmedia, pattern = "Facebook"), "yes", "no"),
    linkedin = if_else(str_detect(uses_socialmedia, pattern = "LinkedIn"), "yes", "no"),
    twitter = if_else(str_detect(uses_socialmedia, pattern = "Twitter"), "yes", "no"),
    googleplus = if_else(str_detect(uses_socialmedia, pattern = "Google Plus"), "yes", "no")) %>%
  mutate(
    # If they use a social media site and block on it, indicate yes
    blocks_on_social_media = if_else(
      facebook == "yes" | 
        linkedin == "yes" | 
        twitter == "yes" | 
        googleplus == "yes" & 
        blocks_on_social_media == "Yes",
      "Yes", 
      if_else(blocks_on_social_media == "No", "No", NA_character_)))
  
```

## 1.15 Social Capital


Next, we calculate social capital indices. Each index relies on available data; some indices, like the social trust index, have a scattering of missing data throughout, meaning that even though only a small number of responses were missing relative to the whole, the number of responses for whom indices cannot be calaculated is much greater. To get around this, we use multiple imputation to fill in these responses, drawing from latent patterns in the data. To maximize the amount of data used to make inferences, we calculate components for each index, and then run multiple imputation on them together all at once.


### 1. Organized Group Interactions Index 

First, we calculate the Organized Group Interactions Index, based on how often you attend public meetings, club meetings, and local community events.

```{r}
# Create a custom standardization function that won't break with missing data
standardize = function(x) {(x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))}

# Calculate Organized Group Interactions Index
dat <- dat %>%
  rename(
    # Identify and rename component parts for readability
    attended_club_meetings = GROUPINF1B_1,
    attended_public_meeting = GROUPINF1C_3,
    attended_local_community_events = GROUPINF1A_1) %>%
  # Recode components
  mutate_at(vars("attended_club_meetings",
                 "attended_public_meeting",
                 "attended_local_community_events"),
            funs(. %>% dplyr::recode(
              "Never did this" = 0,
              "Once" = 1,
              "A few times" = 2,
              "2-4 times" = 3,
              "5-9 times" = 7,
              "about once a month on average" = 12,
              "about twice a month" = 24,
              "about once a week on average" = 52,
              "more than once a week" = 60))) %>%
  
  # For each row,
  rowwise() %>%
  # Please tally up how many responses were valid
  mutate(valid_responses = sum(!is.na(attended_club_meetings),
                               !is.na(attended_public_meeting),
                               !is.na(attended_local_community_events))) %>%
  ungroup() %>%
  # Now rescale 
  mutate_at(vars("attended_club_meetings",
                 "attended_public_meeting",
                 "attended_local_community_events"), funs( standardize(.))) %>%
  # Now for each row,
  rowwise() %>%
  # If there are enough valid responses (3),
  # calculate a social trust score by averaging the indicators
  mutate(organized_group_interactions_index = case_when(
    valid_responses >= 2 ~ sum(attended_club_meetings, attended_public_meeting,
                               attended_local_community_events, na.rm = TRUE) / valid_responses,
    TRUE ~ NA_real_)) %>%
  ungroup()
```

### 2. Informal Social Interactions Index

Next, we create the Informal Social Interactions Index, based on how often you visit with relatives, socialize with co-workers outside of work, hang out with friends in public places, and play cards and board games. The original measure also included how often you have friends visit home; this was excluded by mistake in our survey.

```{r}
dat <- dat %>%
  # Identify and rename component parts for readability
  rename(played_games = GROUPINF1A_3,
         visited_relatives = GROUPINF1A_4,
         socialized_with_coworkers = GROUPINF1B_3,
         hung_out_in_public = GROUPINF1B_4) %>%
  # Recode components
  mutate_at(vars("played_games", "visited_relatives",
                 "socialized_with_coworkers", "hung_out_in_public"),
              funs(. %>% dplyr::recode(
              "Never did this" = 0,
              "Once" = 1,
              "A few times" = 2,
              "2-4 times" = 3,
              "5-9 times" = 7,
              "about once a month on average" = 12,
              "about twice a month" = 24,
              "about once a week on average" = 52,
              "more than once a week" = 60))) %>%
    # For each row,
  rowwise() %>%
  # Please tally up how many responses were valid
  mutate(valid_responses = sum(!is.na(played_games), !is.na(visited_relatives),
                               !is.na(socialized_with_coworkers),
                               !is.na(hung_out_in_public))) %>%
  ungroup() %>%
  # Now rescale 
  mutate_at(vars(played_games, visited_relatives,
                 socialized_with_coworkers, hung_out_in_public), 
            funs( standardize(.))) %>%
  # Now for each row,
  rowwise() %>%
  # If there are enough valid responses (3),
  # calculate a social trust score by averaging the indicators
  mutate(informal_social_interactions_index = case_when(
    valid_responses >= 2 ~ sum(played_games, visited_relatives,
                 socialized_with_coworkers,hung_out_in_public, na.rm = TRUE) / valid_responses,
    TRUE ~ NA_real_)) %>%
  ungroup()

```

### 3. Formal Group Involvement Index

Next, we calculate the Formal Group Involvement Index, which is the total number of groups they participate in, divided by the number of groups that were applicable to them. Responses to at least 2 groups were required for the measure to be calculated. 

```{r, message = FALSE}
# specify variables
myvars <- paste("GROUPFOR", 1:18, sep = "_")

dat <- dat %>%
  # Identify and rename component parts for readability
  # This measure uses all items in the GROUPFOR question series
  mutate_at(vars(
    # Shortcut for writing out all 18
    myvars),
    funs(. %>% dplyr::recode(
      "Yes" = 1,
      "No" = 0))) %>%
  
    # For each row,
  rowwise() %>%
  # Please tally up how many responses were valid
  mutate(valid_responses = sum(!is.na(GROUPFOR_1), !is.na(GROUPFOR_2),
                               !is.na(GROUPFOR_3), !is.na(GROUPFOR_4),
                               !is.na(GROUPFOR_5), !is.na(GROUPFOR_6),
                               !is.na(GROUPFOR_7), !is.na(GROUPFOR_8),
                               !is.na(GROUPFOR_9), !is.na(GROUPFOR_10),
                               !is.na(GROUPFOR_11), !is.na(GROUPFOR_12),
                               !is.na(GROUPFOR_13), !is.na(GROUPFOR_14),
                               !is.na(GROUPFOR_15), !is.na(GROUPFOR_16),
                               !is.na(GROUPFOR_17), !is.na(GROUPFOR_18))) %>%
  ungroup() %>%
  # Now rescale 
  mutate_at(vars(myvars), 
            funs( standardize(.))) %>%
  # Now for each row,
  rowwise() %>%
  # If there are enough valid responses (3),
  # calculate a social trust score by averaging the indicators
  mutate(formal_group_involvement_index = case_when(
    valid_responses >= 2 ~ sum(GROUPFOR_1,GROUPFOR_2,GROUPFOR_3,GROUPFOR_4,
                               GROUPFOR_5,GROUPFOR_6,GROUPFOR_7,GROUPFOR_8,
                               GROUPFOR_9,GROUPFOR_10,GROUPFOR_11,GROUPFOR_12,
                               GROUPFOR_13,GROUPFOR_14,GROUPFOR_15,GROUPFOR_16,
                               GROUPFOR_17,GROUPFOR_18, na.rm = TRUE) / valid_responses,
    TRUE ~ NA_real_)) %>%
  ungroup()
```


### 4. Social Trust Index

Next, we build the Social Trust Index, which combines general interpersonal trust, and how much you trust neighbors, trust co-workers, trust fellow congregants, trust store employees where you shop, and trust local police. At least three of these answers had to be provided for a score to be calculated. The index is calculated as the mean of the standardized responses to the 5 questions. (Originally, they used national norms to standardize.)"

```{r}
# Create a custom standardization function that won't break with missing data
standardize = function(x) {(x - min(x, na.rm = TRUE)) / (max(x, na.rm = TRUE) - min(x, na.rm = TRUE))}

# Calculate the social trust index!
dat <- dat %>%
  mutate(trust_general = TRUST,
         trust_neighbors = ITRUST_1,
         trust_coworkers = ITRUST_2,
         trust_coreligious = ITRUST_3,
         trust_shops = ITRUST_4,
         trust_police = ITRUST_6) %>%
  # Recode trust_general
  mutate(
    trust_general = dplyr::recode(
      trust_general,
      "You can't be too careful" = 0,
      "Depends" = 1,
      "People can be trusted" = 2)) %>%
  # Recode responses
  mutate_at(vars("trust_neighbors", "trust_coworkers", 
                 "trust_coreligious","trust_shops", "trust_police"),
            funs(. %>% dplyr::recode(
              "Trust them a lot" = 3,
              "Trust them some" = 2,
              "Trust them only a little" = 1, 
              "Trust them not at all" = 0,
              "Does not apply" = NA_real_))) %>%
  # For each row,
  rowwise() %>%
  # Please tally up how many responses were valid
  mutate(valid_responses = sum(!is.na(trust_general), !is.na(trust_neighbors),
                               !is.na(trust_coworkers), !is.na(trust_coreligious),
                               !is.na(trust_shops), !is.na(trust_police))) %>%
  ungroup() %>%
  # Now rescale 
  mutate_at(vars(trust_general, 
                 trust_neighbors, trust_coworkers, 
                 trust_coreligious, trust_shops, trust_police), funs( standardize(.))) %>%
  # Now for each row,
  rowwise() %>%
  # If there are enough valid responses (3),
  # calculate a social trust score by averaging the indicators
  mutate(social_trust_index = case_when(
    valid_responses >= 3 ~ sum(trust_general, trust_neighbors, trust_coworkers, 
                trust_coreligious, trust_shops, trust_police, na.rm = TRUE) / valid_responses,
    TRUE ~ NA_real_)) %>%
  ungroup()
```

### 5. Composite Racial Group Trust Index

```{r, message = FALSE}

dat <- dat %>%
  mutate(
    multirace = RACE1,
    trust_white = ITRUST_7,
    trust_black = ITRUST_8,
    trust_asian = ITRUST_9,
    trust_hisplat = ITRUST_10) %>%
  # Recode race-based trust
  mutate_at(vars("trust_white", "trust_black", "trust_asian", "trust_hisplat"),
            funs(. %>% dplyr::recode(
              "Trust them a lot" = 3,
              "Trust them some" = 2,
              "Trust them only a little" = 1, 
              "Trust them not at all" = 0,
              "Does not apply" = NA_real_))) %>%
  # Now, we only are interested in people trust of members of different racial groups,
  # so we're going to remove any cases where a white respondent responds about white residents,
  # for example
  mutate(trust_white = if_else(str_detect(multirace, "White"), NA_real_, trust_white),
         trust_black = if_else(str_detect(multirace, "Black"), NA_real_, trust_black),
         trust_asian = if_else(str_detect(multirace, "Asian|Middle Eastern"), 
                               NA_real_, trust_asian),
         trust_hisplat = if_else(str_detect(multirace, "Hispanic"), NA_real_, trust_hisplat)) %>%
  # For each row,
  rowwise() %>%
  # Please tally up how many responses were valid
  mutate(valid_responses = sum(!is.na(trust_white), !is.na(trust_black),
                               !is.na(trust_asian), !is.na(trust_hisplat))) %>%
  ungroup() %>%
  # Now rescale 
  mutate_at(vars(trust_white, trust_black, trust_asian, trust_hisplat), 
            funs( standardize(.))) %>%
  # Now for each row,
  rowwise() %>%
  # If there are enough valid responses (3),
  # calculate a social trust score by averaging the indicators
  mutate(composite_racial_group_trust_index = case_when(
    valid_responses >= 3 ~ sum(trust_white, trust_black, 
                               trust_asian, trust_hisplat, na.rm = TRUE) / valid_responses,
    TRUE ~ NA_real_)) %>%
  ungroup() 

```




### 6. Diversity of Friendship Network Index

Next, we gather the Diversity of Friendship Network Index, which counts how many of 11 different types of personal friends you have. 

```{r, message = FALSE}

# Load in and recode variables
dat <- dat %>%
  # This measure uses all items in the GROUPFOR question series
  mutate_at(vars(
    # Shortcut for writing out all 16
    contains("DIVRSITY")),
            funs(case_when(. == "Yes" ~ 1,
                           . == "No" ~ 0,
                           TRUE ~ NA_real_))) %>%
  # For each row,
  rowwise() %>%
  # Please tally up how many responses were valid
  mutate(valid_responses = sum(!is.na(DIVRSITY_1), !is.na(DIVRSITY_2),
                               !is.na(DIVRSITY_3), !is.na(DIVRSITY_4),
                               !is.na(DIVRSITY_5), !is.na(DIVRSITY_6),
                               !is.na(DIVRSITY_7), !is.na(DIVRSITY_8),
                               !is.na(DIVRSITY_9), !is.na(DIVRSITY_10),
                               !is.na(DIVRSITY_11), !is.na(DIVRSITY_12),
                               !is.na(DIVRSITY_13), !is.na(DIVRSITY_14),
                               !is.na(DIVRSITY_15), !is.na(DIVRSITY_16))) %>%
  ungroup() %>%
  # Now rescale 
  mutate_at(vars(contains("DIVRSITY")),
                 funs( standardize(.))) %>%
  # Now for each row,
  rowwise() %>%
  # If there are enough valid responses (2),
  # calculate a diversity index by averaging the indicators
  mutate(diversity_friendship_network_index = case_when(
    valid_responses >= 2 ~ sum(DIVRSITY_1, DIVRSITY_2, DIVRSITY_3, DIVRSITY_4,
                               DIVRSITY_5, DIVRSITY_6, DIVRSITY_7, DIVRSITY_8,
                               DIVRSITY_9, DIVRSITY_10, DIVRSITY_11, DIVRSITY_12,
                               DIVRSITY_13, DIVRSITY_14, DIVRSITY_15, DIVRSITY_16, 
                               na.rm = TRUE) / valid_responses,
    TRUE ~ NA_real_)) %>%
  ungroup()
```


### 7. Civic Participation Index

Next, we collect the civic participation index, which counts whether you voted in the last election, signed a petition, attended a political meeting or rally, worked on a community project, or participated in a protest/demonsration/boycott/march in the last twelve months.

```{r, message = FALSE}
dat <- dat %>%
  # Identify and rename component parts for readability
  rename(signed_petition = CIVPARTB_1,
         attend_political_meeting = CIVPARTB_2,
         worked_community_project = CIVPARTB_3,
         attend_protest = CIVPARTB_4) %>%
  # recode voting
  mutate(voted_in_2016 = dplyr::recode(
    vote_2016,
    "Donald Trump" = 1,
    "Hillary Clinton" = 1,
    "Someone else" = 1,
    "Did not vote" = 0,
    # If they report that they "don't recall" who they voted for,
    # We can't distinguish this from 
    # a) they don't want to tell us who they voted for
    # or b) they don't remember if they voted at all. 
    # As a result, we set this response to NA
    "Don't recall" = NA_real_,
    "Was not eligible" = NA_real_)) %>%
  # This measure uses all items in the CVIPART question series
  mutate_at(vars("signed_petition", "attend_political_meeting", 
                 "worked_community_project", "attend_protest"),
            funs(case_when(. == "Yes" ~ 1,
                           . == "No" ~ 0,
                           TRUE ~ NA_real_))) %>%
  
  # For each row,
  rowwise() %>%
  # Please tally up how many responses were valid
  mutate(valid_responses = sum(!is.na(voted_in_2016),
                               !is.na(signed_petition), !is.na(attend_political_meeting),
                               !is.na(worked_community_project), !is.na(attend_protest))) %>%
  ungroup() %>%
  # Now rescale 
  mutate_at(vars(voted_in_2016, signed_petition, attend_political_meeting,
                 worked_community_project, attend_protest),
            funs( standardize(.))) %>%
  # Now for each row,
  rowwise() %>%
  # If there are enough valid responses (2),
  # calculate a diversity index by averaging the indicators
  mutate(civic_participation_index = case_when(
    valid_responses >= 2 ~ sum(voted_in_2016, signed_petition, attend_political_meeting,
                               worked_community_project,attend_protest,
                               na.rm = TRUE) / valid_responses,
    TRUE ~ NA_real_)) %>%
  ungroup()
```


### 8. Help from Neighbors

```{r, message = FALSE}

dat <- dat %>% 
  rename(neighbors_mutual_aid = ASSISTA, 
         help_from_neighbors = ASSISTB_1, 
         help_from_family = ASSISTB_2, 
         help_from_local_govt = ASSISTB_3, 
         help_from_natl_govt = ASSISTB_4) %>% 
  mutate(neighbors_mutual_aid = dplyr::recode(
    neighbors_mutual_aid,
    "Never" = 0,
    "Not so often" = 1,
    "Somewhat often" = 2,
    "Very often" = 3)) %>%
    mutate_at(vars("help_from_neighbors", "help_from_family",
                   "help_from_local_govt", "help_from_natl_govt"),
              function(data){
                dplyr::recode(
                  data,
                  "Strongly agree"  = 2,
                  "Somewhat agree" = 1,
                  "Neither agree nor disagree" = 0,
                  "Somewhat disagree" = -1,
                  "Strongly disagree" = -2)})
```

## 1.16 Demographics

```{r, message = FALSE}
dat <- dat %>%
  rename(marriage = MARRIAGE, 
         hispanic = HISP, 
         education = EDUCATION,
         immigrant_generation = IMMIGRANT,
         income = INCOME1,
         health_insurance = INSURE,
         gender = SEX,
         employment = EMPLOY,
         religion = RELIG1)
# Unfortunately, our Race/Ethnicity question, which allowed people to select multiple identities, led to some people selecting ALL of them.
# Qualtrics provides their own four category race variable that should suit our purposes.

```

## 1.17 Export
```{r, message=  FALSE, warning = FALSE}
# export
dat %>%
  # Keep just the main ones. For others, we can refer to the original data
  select(
    id,
    complete,
    zip, county,
    latitude = LocationLatitude, 
    longitude = LocationLongitude, 
    date,
    age, birthyear,
    gender, education, employment, marriage,
    health_insurance,
    race, multirace, hispanic,
    religion,  
    income, 
    immigrant_generation, 
    uses_socialmedia,facebook, googleplus, 
    twitter, linkedin,contains("block"),
    contains("days_"),
    # BMI
    contains("bmi"), contains("weight"), contains("height"), 
    # Health
    cesdr7,
    contains("condition"),DIAB_PREG,
    contains("health"), 
    contains("happiness"),
    # Change in
    contains("chg"),
    contains("activity"),
    contains("smok"), 
    contains("vote"), 
    contains("PARTY"), contains("party"), 
     ideology_5, 
    # Polarization measures
    contains("self"),
    contains("state"), 
    contains("us_"), 
    contains("word"),
    contains("partisan_gap"),
    affective_polarization_words,
    contains("same_"), contains("diff_"), 
    contains("favor"), 
    contains("diff"),
    ifmarried_rep, ifmarried_dem,
    # Social Capital Indices
    contains("index"),
    contains("trust"),
    contains("friend"), contains("mutual"), 
    worked_community_project,signed_petition,
    contains("help"), 
    contains("attend")) %>%
    # Fix any NaNs or Infinites
  mutate_at(vars(social_trust_index, 
                 informal_social_interactions_index, 
                 organized_group_interactions_index, 
                 formal_group_involvement_index,
                 diversity_friendship_network_index,
                 composite_racial_group_trust_index,
                 civic_participation_index),
            funs(if_else(is.nan(.) | is.infinite(.), NA_real_, .))) %>%
  
  # Let's create a few extra categories too
    # Mutate categories 
  mutate(
    labor_force = employment %>% dplyr::recode(
      "Employed for wages" = "in labor force",
      "Self-employed" = "in labor force",
      "Out of work for less than 1 year" = "in labor force",
      
      "A homemaker" = "not in labor force",
      "Retired" = "not in labor force",
      "Unable to work" = "not in labor force",
      "A student" = "not in labor force",
      "Out of work for 1 year or more" = "not in labor force"),
    
    # Poverty threshold is ~ 13,300 for under age 65
    # ~12261 for over 65
    # The lowest category we asked was less than 10,000 and 10,000 - 19,999
    # So, let's just classify under 10,000 as poverty
    poverty = if_else(
      !is.na(income) & 
        income == "Less than $10,000", 
      "Yes", "No"),
    # Did they complete a bachelor's degree?
    bachelor = if_else(
      !is.na(education) &
        education == "College 4 years or more (College graduate)",
      "Yes", "No"),
    # Do they identify as female?
    female = if_else(!is.na(gender) &
                       gender == "Female", "Yes", "No"),
    # Do they report being uninsured
    uninsured = if_else(!is.na(health_insurance) &
                          health_insurance == "No", "Yes", "No"),
    # Reclassify anyone who indicated that they are a member of an unmarried couple as never married.
    # This helps us with weighting
    marriage2 = if_else(!is.na(marriage) &
                          marriage == "A member of an unmarried couple",
                        "Never married", marriage)
  ) %>%
  

  
    mutate(nevermarried = if_else(!is.na(marriage) & marriage == "Never married", "yes", "no") %>%
           factor() %>%
           relevel(ref = "no"),
         employment = employment %>% dplyr::recode(
           "Employed for wages" = "employed",
           "Self-employed" = "employed",
           "Out of work for less than 1 year" = "unemployed",
           "A homemaker" = "not in labor force",
           "Retired" = "not in labor force",
           "Unable to work" = "not in labor force",
           "A student" = "not in labor force",
           "Out of work for 1 year or more" = "not in labor force") %>%
           factor() %>%
           relevel(ref = "employed"),
         # Did they complete a bachelor's degree?
         somecollege = if_else(
           !is.na(education) &
             education %in% c("College 4 years or more (College graduate)", 
                              "College 1 year to 3 years (Some college or technical school)"),
           "Yes", "No") %>% 
           factor() %>%
           relevel(ref = "No"),
         # Do they identify as female?
         female = if_else(!is.na(gender) &
                            gender == "Female", "Yes", "No") %>% 
           factor() %>%
           relevel(ref = "No"),
         # Do they report being uninsured
         uninsured = if_else(!is.na(health_insurance) &
                               health_insurance == "No", "Yes", "No") %>% 
           factor() %>%
           relevel(ref = "No"),
         # Turn income into a numeric variable
         income2 = income %>% dplyr::recode(
           "Less than $10,000" = "1",
           "$10,000 - $19,000" = "2",
           "$20,000 - $29,999" = "3",
           "$30,000 - $49,999" = "4",
           "$50,000 - $69,999" = "5",
           "$70,000 - $99,999" = "6",
           "$100,000 - $124,999" = "7",
           "$125,000 - $149,999" = "8",
           "$150,000 - $199,999" = "9",
           "$200,000 - $249,999" = "10",
           "$250,000 or more" = "11",
           "Prefer not to say" = NA_character_) %>% as.numeric(),
         # recode race
         race = race %>% factor() %>% relevel(ref = "white"),
         # recode religion
         religion = religion %>% factor() %>% relevel(ref = "Protestant")) %>%
  
    mutate(sim_self_us_voter_avg = 10 - diff_self_us_voter_avg,
           sim_self_state_voter_avg = 10 - diff_self_state_voter_avg) %>%
  
  
  write_csv("survey_builder/survey_dataset.csv")

remove(dat)
```

# 2. County Data

## 2.0. Load Packages

```{r, message = FALSE, warning = FALSE}
library(tidyverse)
library("dplyr")
library("tidyr")
library("haven")
select <- dplyr::select
```

## 2.1. Social Capital

Second, access county Social Capital Indices.

```{r}
haven::read_dta("county_builder/SoCI_09262019.dta") %>% 
  select(fips = fips_n, bonding, bridging, linking, socialcap) %>%
  mutate(fips = fips %>% str_pad(width = 5, side = "left", pad = "0")) %>%
  write_csv("county_builder/indices.csv")
```

## 2.2 Health Data

Third,download current County Health Rankings Data.

```{r}
# Use this website
# https://www.countyhealthrankings.org/explore-health-rankings/rankings-data-documentation

read_csv("county_builder/chr_2020.csv") %>%
  # remove anything other than the raw measures
  select(!matches("CI low|CI high|numerator|denominator")) %>%
  # remove extra variable names
  slice(-1) %>%
  # Remove state or country level summaries
  filter(`County FIPS Code` != "000") %>%
  # Make all variables numeric, except identifiers
  mutate_at(vars(-c(1:7)), (funs(. %>% as.numeric))) %>%
  # select variables
  select(state = `State Abbreviation`, fips = `5-digit FIPS Code`, county = Name,
         # Documentation available here:
         # https://www.countyhealthrankings.org/explore-health-rankings/measures-data-sources/2020-measures
         
         # Years of potential life lost before age 75 per 100,000 population (age-adjusted). (2016-2018)
         premature_death = `Premature death raw value`,  
         premature_death_black = `Premature death (Black)`,
         premature_death_hispanic = `Premature death (Hispanic)`,
         premature_death_white = `Premature death (White)`,
         # Percentage of adults reporting fair or poor health (age-adjusted) 2017
         poor_fair_health = `Poor or fair health raw value`, 
         # Average number of physically unhealthy days reported in past 30 days (age-adjusted). 2017
         days_poor_physical_health = `Poor physical health days raw value`,
         # Average number of mentally unhealthy days reported in past 30 days (age-adjusted) 2017
         days_poor_mental_health = `Poor mental health days raw value`,
         # Percentage of adults who are current smokers. 2017
         smoking = `Adult smoking raw value`, 
         # Percentage of the adult population (age 20 and older) 
         # that reports a body mass index (BMI) greater than or equal to 30 kg/m2. (2016)
         obesity = `Adult obesity raw value`,
         # Percentage of adults aged 20 and above with diagnosed diabetes. (2016)
         diabetes = `Diabetes prevalence raw value`, 
         # Index of factors that contribute to a healthy food environment,
         # from 0 (worst) to 10 (best). (2015-2017)
         food_env_index = `Food environment index raw value`,
         # Percentage of adults age 20 and over reporting no leisure-time physical activity. (2016)
         physical_inactivity = `Physical inactivity raw value`,
         # Percentage of population with adequate access to locations for physical activity. (2010, 2019)
         exercise_access = `Access to exercise opportunities raw value`,
         # Percentage of adults reporting binge or heavy drinking (2017)
         drinking_excessive = `Excessive drinking raw value`, 
         # Percentage of driving deaths with alcohol involvement. (2014-2018)
         alcohol_driving_deaths = `Alcohol-impaired driving deaths raw value`,
         # Number of newly diagnosed chlamydia cases per 100,000 population. (2017) 
         sexually_transmitted_infections = `Sexually transmitted infections raw value`,
         # Percentage of population under age 65 without health insurance. (2017)
         uninsured = `Uninsured raw value`,
         #  rate of number of primary care providers/100,000 population (2017)
         primary_care_physicians = `Primary care physicians raw value`,
         #  rate of number of dentistry providers/100,000 population (2018)
         dentists = `Dentists raw value`,
         #  rate of number of mental health providers/100,000 population (2019)
         mental_health_providers = `Mental health providers raw value`,
         # Rate of hospital stays for ambulatory-care sensitive conditions per 100,000 Medicare enrollees. (2017)
         prevent_hospital_stays = `Preventable hospital stays raw value`,
         # Rate of hospital stays for ambulatory-care sensitive conditions per 100,000 Medicare enrollees. (2017)
         prevent_hospital_stays_white = `Preventable hospital stays (White)`,
         # Rate of hospital stays for ambulatory-care sensitive conditions per 100,000 Medicare enrollees. (2017)
         prevent_hospital_stays_black = `Preventable hospital stays (Black)`,
        # `Percentage of fee-for-service (FFS) Medicare enrollees that had an annual flu vaccination.`
        flu_vaccinations = `Flu vaccinations raw value`,
        # Percentage of adults ages 25-44 with some post-secondary education. (2014-2018)
        some_college = `Some college raw value`,
        # Percentage of population ages 16 and older unemployed but seeking work. (2018)
        unemployment = `Unemployment raw value`,
        # Ratio of household income at the 80th percentile to income at the 20th percentile (2014-2018)
        income_inequality = `Income inequality raw value`,
        # Number of membership associations per 10,000 population. (2017)
        social_associations = `Social associations raw value`,
        # Number of reported violent crime offenses per 100,000 population. (2014-2016)
        violent_crime = `Violent crime raw value`,
        # `Number of deaths due to injury per 100,000 population.` (2014-2018)
        injury_deaths = `Injury deaths raw value`,
        # Average daily density of fine particulate matter in micrograms per cubic meter (PM2.5). (2014)
        air_pollution = `Air pollution - particulate matter raw value`,
        # Average number of years a person can expect to live. (2016-2018)
        life_expectancy = `Life expectancy raw value`,
        life_expectancy_black = `Life expectancy (Black)`,
        life_expectancy_white = `Life expectancy (White)`,
        life_expectancy_hisp = `Life expectancy (Hispanic)`,
        life_expectancy_asian = `Life expectancy (Asian/Pacific Islander)`,
        # Number of deaths among residents under age 75 per 100,000 population (age-adjusted). (2016-2018)
        premature_age_adjusted_mortality = `Premature age-adjusted mortality raw value`,
        
        # Percentage of adults reporting 14 or more days of poor physical health per month. (2017)
        frequent_phys_distress = `Frequent physical distress raw value`,
        # Percentage of adults reporting 14 or more days of poor mental health per month. (2017)
        frequent_ment_distress = `Frequent mental distress raw value`,
        # Number of people aged 13 years and older living with a diagnosis of
        # human immunodeficiency virus (HIV) infection per 100,000 population. (2016)
        hiv = `HIV prevalence raw value`,
        # `Number of drug poisoning deaths per 100,000 population. (2016-2018)
        drug_overdose_deaths = `Drug overdose deaths raw value`,
        # The income where half of households in a county earn more and half of households earn less. (2018)
        median_household_income = `Median household income raw value`,
        # Index of dissimilarity where higher values indicate greater residential
        # segregation between Black and White county residents. (2014-2018)
        residential_segregation_black_white = `Residential segregation - Black/White raw value`,
        # Index of dissimilarity where higher values indicate greater residential 
        # segregation between non-White and White county residents. (2014-2018)
        residential_segregation_nonwhite_white = `Residential segregation - non-White/White raw value`,
        # Percentage of occupied housing units that are owned. (2014-2018)
        homeownership = `Homeownership raw value`,
        # The following demographic variables all come from the American Community Survey 2014-2018
        #pop = `Population raw value`,
        #pop_age_under_18 = `% below 18 years of age raw value`,
        #pop_age_65_plus = `% 65 and older raw value`,
        #pop_black = `% Non-Hispanic Black raw value`,
        #pop_nativeam = `% American Indian & Alaska Native raw value`,
        #pop_asian = `% Asian raw value`,
        #pop_pacific = `% Native Hawaiian/Other Pacific Islander raw value`,
        #pop_hisp = `% Hispanic raw value`,
        #pop_white = `% Non-Hispanic White raw value`, 
        pop_non_english_speaker = `% not proficient in English raw value`, # 2014-2018
        #pop_female = `% Females raw value`,
        #pop_rural = `% Rural raw value`
        ) %>%
  write_csv("county_builder/chr.csv")

```

## 2.3. Election Data

```{r}
# Download county Presidential election outcomes from 2000-2016 
#https://dataverse.harvard.edu/dataset.xhtml?persistentId=doi:10.7910/DVN/VOQCHQ

read_csv("county_builder/countypres_2000-2016.csv") %>% 
  # Filter to presidential elections involving democrats and republicans
  filter(office == "President",
         party %in% c("democrat", "republican"),
         year %in% c(2000, 2004, 2008, 2012, 2016)) %>%
  select(year, fips = FIPS, party, candidatevotes, totalvotes) %>%
  mutate(fips = str_pad(fips, width = 5, side = "left", pad = "0")) %>%
  # For each fips-year-party pair, calculate the voteshare
  group_by(year, fips, party) %>%
  summarize(voteshare = sum(candidatevotes, na.rm = TRUE) / mean(totalvotes, na.rm = TRUE)) %>%
  ungroup() %>%
  # Now pivot into wide format to easily join into a county-level matrix
  pivot_wider(
    id_cols = fips,
    names_from = c(party, year),
    values_from = voteshare) %>%
  # export to file
  saveRDS("county_builder/elections.rds")
  
```

## 2.4. Demographics

```{r}
# Get most recent variables
library(tigris)
library("tidycensus")
library("censusapi")

# Load API key for census 
census_api_key("97b29053a89e5396ef806bf7f4fae05b7387fabd")

# Extract 5-year estimates for 2013-2017, since 2017 was our outcome variable year
get_acs(
  year = 2017,
  geography = "county",
  variables = c(
    "B01003_001","B01001_026","B01002_001","B02001_002",
    "B06008_002", "B06008_001",
    "B02001_003","B02001_004","B02001_005","B02001_006",
    "B03001_003","B06009_004","B19083_001","B23025_003", 
    "B23025_005","B19013_001","B25105_001","B08128_006",
    "B08128_007", "B08128_008"),
  survey = "acs5") %>%
  mutate(year = as.character(2017)) %>%
  select(-moe, -NAME) %>%
  rename(fips = GEOID) %>%
  mutate(variable = variable %>% dplyr::recode(
    "B01003_001" = "pop", # Total Population
    "B01001_026" = "pop_female", # Women
    "B01002_001" = "median_age",
    "B06008_001" = "pop_over_15", # Population over age 15
    "B06008_002" = "pop_never_married", # Population over age 15 that never married
    "B02001_002" = "pop_white",
    "B02001_003" = "pop_black", #Estimate!!Total!!Black or African American alone
    "B02001_004" = "pop_natam", #Estimate!!Total!!American Indian and Alaska Native alone
    "B02001_005" = "pop_asian", #Estimate!!Total!!Asian alone
    "B02001_006" = "pop_pacific", #Estimate!!Total!!Native Hawaiian and Other Pacific Islander alone
    "B03001_003" = "pop_hisplat", # Hispanic or Latino
    "B06009_004" = "pop_some_college",	
    "B19083_001" = "gini", #Income inequality: Estimate!!Gini Index) 
    "B23025_003" = "pop_labor_force", # Estimate!!Total!!In labor force!!Civilian labor force
    "B23025_005" = "pop_unemployed", # Estimate!!Total!!In labor force!!Civilian labor force!!Unemployed
    "B19013_001" = "median_income",	#Estimate!!Median Household income (dollars)!
    "B25105_001" = "median_monthly_housing_cost",
    "B08128_006" = "employees_muni", #Local government linkage: Estimate!!Total!!Local government workers
    "B08128_007" = "employees_state", #State government linkage: Estimate!!Total!!State government workers
    "B08128_008" = "employees_fed", #Federal government linkage: Estimate!!Total!!Federal government workers
  )) %>%
    select(-year) %>%
  pivot_wider(
    id_cols = fips,
    names_from = c(variable),
    names_sep = "_",
    values_from = estimate) %>%
  mutate(
    pop_never_married = pop_never_married / pop_over_15,
    pop_female = pop_female / pop,
    pop_white = pop_white / pop,
    pop_black = pop_black / pop,
    pop_asian = pop_asian / pop,
    pop_natam = pop_natam / pop,
    pop_pacific = pop_pacific / pop,
    pop_hisplat = pop_hisplat / pop,
    pop_some_college = pop_some_college / pop,
    pop_unemployed = pop_unemployed / pop_labor_force * 1000,
    employees_fed = employees_fed / pop * 1000,
    employees_state = employees_state / pop * 1000,
    employees_muni = employees_muni / pop * 1000) %>%
  saveRDS("county_builder/demographics.rds")
```


## 2.5. Religion

```{r, message = FALSE, warning = FALSE}
# Download 2010 religious census
#http://www.thearda.com/Archive/Files/Downloads/RCMSCY10_DL2.asp
readxl::read_excel("county_builder/U.S. Religion Census Religious Congregations and Membership Study, 2010 (County File).XLSX") %>%
  # Grab the county code, population in 2010,
  # total number of religious adherents,
  # and all tallies of adherents 
  select(fips = FIPS, pop = POP2010, total_adh = TOTADH, 
        evangelical = "EVANADH", # evangelical protestant
    black_protestant = "BPRTADH", # Black protestant
    mainline_protestant = "MPRTADH", # Mainline Protestant
    catholic = "CATHADH", # Catholic
    orthodox = "ORTHADH", # Orthodox
    other = "OTHADH", # Other religious adherents
    # Including...
    muslim = "MSLMADH", # Muslim
    conservative_judaism = "CJUDADH", # conservative Judaism
    reconstructionist_judaism = "RJUDADH", # Reconstructionist Judaism
    reformed_judaism = "RFRMADH", # Reform Judaism
   orthodox_judaism =  "OJUDADH") %>% # Orthodox Judaism
  # Fix fips code
  mutate(fips = str_pad(fips, 5, "left", "0")) %>%
  # If a cell is blank, that means no persons of that type were found. Fill in with zero.
  mutate_at(vars(-c(fips, pop)), funs(if_else(!is.na(.), as.numeric(.), 0))) %>%
  # We're going to calculate the following categories, 
  # and divide the number of adherents per 1000 residents
  mutate(
    total_adherents = total_adh / pop * 1000,
    protestant = (evangelical + black_protestant + mainline_protestant) / pop * 1000,
         catholic = catholic / pop * 1000,
         orthodox = orthodox / pop * 1000,
         other_adherents = (other - muslim - conservative_judaism - reformed_judaism - 
           orthodox_judaism - reconstructionist_judaism) / pop * 1000,
         muslim = muslim / pop * 1000,
         jewish = (conservative_judaism + reformed_judaism +
           orthodox_judaism + reconstructionist_judaism) / pop * 1000) %>%
  # Now select just the main results
  select(fips, total_adherents, protestant, catholic, orthodox, muslim, jewish, other_adherents) %>%
  saveRDS("county_builder/religious_affiliation.rds")
```

## 2.6  Population Density

```{r}

# Gather US Equal Area Projection
# https://spatialreference.org/ref/esri/usa-contiguous-albers-equal-area-conic/
usea <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"

# Download the shapefile of urbanized areas in the US
myurban <- tigris::urban_areas(cb = TRUE, year= 2019) %>%
  st_as_sf() %>%
  st_transform(crs = usea)


# Download county shapefiles from tigris
tigris::counties(year = 2018, cb = TRUE) %>%
  st_as_sf() %>%
  st_transform(crs = 4326) %>%
  st_transform(crs = usea) %>%
  dplyr::select(fips = GEOID, county = NAME, geometry) %>%
  saveRDS("shapes/counties_bounds.rds")

# Grab county shapes
read_rds("shapes/counties_bounds.rds") %>%
  # Join in urbanized areas within the same county
  st_join(myurban) %>%
  # For each county, identify
  group_by(fips, county_name = county) %>%
  summarize(geometry = st_union(geometry),
            urban = paste(unique(UATYP10) %>% sort(), collapse = ",")) %>%
  ungroup() %>%
  mutate(urban = na_if(x = urban, y = "") %>%
           recode_factor("U" = "urbanized_area", 
                         "C,U" = "urbanized_area",
                         "C" = "urban_cluster",
                         .missing = "rural")) %>%
  saveRDS("county_builder/pop_density.rds")

# I classified respondents as U for "urbanized area" if they lived in a county designated by the census as having at least one densely developed areas with a population of 50,000 or more, and respondents as "urban cluster" if they lived in a county designated by the census as having at least one densley developed area with a population of 2,500 to 50,000, but no urbanized areas. Finally, I classified respondents as R for rural if none of the above applied.

remove(myurban)
```

## 2.6. Combine

```{r, message = FALSE, warning = FALSE}
# Identify variables missing less than 5% of values
available <- read_csv("county_builder/chr.csv") %>% 
  pivot_longer(
    cols = -c(state, fips, county),
    names_to = "measure",
    values_to = "value") %>%
  group_by(measure) %>%
  summarize(available = sum(if_else(!is.na(value), 1, 0), na.rm = TRUE) / n()) %>%
  filter(available >= 0.95) %>%
  select(measure) %>% unlist() %>% unname()

# Now keep just those variables
read_csv("county_builder/chr.csv") %>%
  select(state, fips, county, available) %>%
  left_join(y = read_csv("county_builder/indices.csv"), by = "fips") %>%
  left_join(by = "fips", y = read_rds("county_builder/demographics.rds")) %>%
  left_join(by = "fips", y = read_rds("county_builder/elections.rds")) %>%
  left_join(by = "fips", y = read_rds("county_builder/religious_affiliation.rds")) %>%
  left_join(by = "fips", y = read_rds("county_builder/pop_density.rds")) %>%
  write_csv("county_dataset.csv")
```



## 2.7. Aggregate Polarization

Measure aggregate level polarization.

```{r, message = FALSE, warning = FALSE}
# Load geospatial packagse
library(tidyverse)
library(sf) # for GIS data manipulation
library(rgdal)
library(tigris) # for obtaining census shapefiles

# Gather US Equal Area Projection
# https://spatialreference.org/ref/esri/usa-contiguous-albers-equal-area-conic/
usea <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"


# Download TIGRIS zipcode shapes and get centroids
tigris::zctas(year = 2018, cb = TRUE) %>%
  st_as_sf() %>%
  st_transform(crs = 4326) %>%
  st_transform(crs = usea) %>%
  # Get centroid of zipcode
  st_centroid() %>%
  saveRDS("shapes/zip_bounds.rds")

select <- dplyr::select

# Get fips codes from zipcode centroid
read_rds("shapes/zip_bounds.rds") %>% 
  select(zip = ZCTA5CE10, geometry) %>%
  left_join(by = c("zip"),
            y = read_csv("survey_builder/survey_dataset.csv") %>%
              select(id, zip) %>%
              mutate(zip = str_pad(zip, width = 5, side = "left", pad = "0"))) %>%
  # Spatially join in the ID of the county in which each respondent lives
  st_join(read_rds("shapes/counties_bounds.rds")) %>%
  as.data.frame() %>%
  select(zip, fips) %>%
  distinct() %>%
  saveRDS("shapes/conversion.rds")
  

# Get fips codes from IP address
read_csv("survey_builder/survey_dataset.csv") %>%
  select(id, longitude, latitude) %>%
  st_as_sf(coords = c("longitude", "latitude"), crs = 4326) %>%
  st_transform(crs = usea) %>%
  # Spatially join in the ID of the county in which each respondent lives
  st_join(read_rds("shapes/counties_bounds.rds")) %>%
  as.data.frame() %>%
  select(id, fips) %>%
  saveRDS("shapes/conversion_ip.rds")


# Create the full dataset
read_csv("survey_builder/survey_dataset.csv") %>%
  # fill in fips code as identified by the zipcode
  mutate(zip = str_pad(zip, width = 5, side = "left", pad = "0")) %>%
  left_join(by = c("zip"),
            y = read_rds("shapes/conversion.rds")) %>%
  # Now fill in fips codes as identified by the ip address
  left_join(by = "id", 
            y = read_rds("shapes/conversion_ip.rds") %>%
              rename(fips_ip = fips)) %>%
  # If the fips code produced by zipcode centroids is missing, 
  # then get the fips code from their IP address
  mutate(fips = if_else(!is.na(fips), fips, fips_ip)) %>%
  # 22 provided zipcodes could not be geo-located to a fips code
   # Now join in the traits of those counties, using the county fips code
  left_join(by = "fips",
            y = read_csv("county_dataset.csv") %>%
              dplyr::select(fips, bonding, bridging, linking, socialcap,
                            democrat_2016, republican_2016, urban)) %>%
  # Now create a variable indicating whether democrats or
  # republicans got more of the vote in a given county
  mutate(winner = if_else(democrat_2016 > republican_2016,
                          true = "Democrat", false = "Republican", 
                          missing = NA_character_)) %>%
  # Now recode party_7 in a categorical indicator
  mutate(party = party_7 %>% as.character() %>%
           dplyr::recode(
             "1" = "Democrat",
             "2" = "Democrat",
             "3" = "Democrat",
             "4" = "Independent",
             "5" = "Republican",
             "6" = "Republican",
             "7" = "Republican")) %>%
  # Now, if the person lives in a county where their 
  # preferred party is predominant,
  # then mark that "same".
  # If the person lives in a county where their preferred party isn't, 
  # mark that "diff"
  # If an independent lives in a Democrat county, 
  # for example, they count as "different"
  mutate(polarized_aggregate = if_else(party == winner, 
                                       true = "same", false = "different", 
                                       missing = NA_character_)) %>%
  dplyr::select(-party, -winner) %>%
  saveRDS("dataset.rds")

rm(list= ls())
```




# 3. Descriptives

## Map

```{r, eval = FALSE}
library(tidyverse)
library(sf)
library(rgdal)

# Gather US Equal Area Projection
# https://spatialreference.org/ref/esri/usa-contiguous-albers-equal-area-conic/
usea <- "+proj=aea +lat_1=29.5 +lat_2=45.5 +lat_0=37.5 +lon_0=-96 +x_0=0 +y_0=0 +ellps=GRS80 +datum=NAD83 +units=m +no_defs"

# Get county centroids
points <- read_rds("shapes/counties_bounds.rds")  %>%
  group_by(fips) %>%
  summarize(geometry = st_centroid(geometry, of_largest_polygon = TRUE)) %>%
  ungroup() %>%
  saveRDS("shapes/counties_centroids.rds")
```

```{r}
library(tidyverse)
library(sf)
library(rgdal)

#tigris::fips_codes %>%
#  select(state, state_code) %>%
#  distinct()

# Import county boundaries
county <- read_rds("shapes/counties_bounds.rds")  %>%
  # Exclude Hawaii and Alaska
  filter(!str_sub(fips, 1,2) %in% c("02", "15", "60", "66",
                                    "69", "72", "74", "78"))

# Import respondent points by county centroids
points <- read_rds("shapes/counties_centroids.rds") %>%
  left_join(by = "fips", y = read_rds("dataset.rds")) %>%
  # Exclude Hawaii and Alaska
  filter(!str_sub(fips, 1,2) %in% c("02", "15", "60", "66", "69", "72", "74", "78")) %>%
  filter(!is.na(diff_self_us_voter_avg)) %>%
  left_join(by = "fips", y = county %>%
              as.data.frame() %>%
              select(fips, county_name = county) %>%
              distinct())

# Get state boundaries 
state <- county %>%
  group_by(state = str_sub(fips, 1,2)) %>%
  summarize(geometry = st_union(geometry)) %>%
  ungroup()

# Visualize
ggplot() +
  # Get county borders
  geom_sf(data = county, fill = NA, color = "lightgrey", size = 0.5) +
  # Plot people using county points
  geom_sf(data = points, mapping = aes(size = diff_self_us_voter_avg), alpha = 0.25) +
  geom_sf(data = state, color = "black", fill = NA) +
  theme_void(base_size = 14) +
  labs(size = "Perceived\nPolarization\nvs.\nAverage\nUS\nVoter") +
  ggspatial::annotation_north_arrow(height = unit(0.5, "cm"), width = unit(0.5, "cm")) +
  ggspatial::annotation_scale(width = unit(0.3, "cm"), pad_x = unit(1, "cm")) +
  ggsave("viz/figure_map.png", dpi = 500, width = 8, height = 4.5)

read_rds("dataset.rds") %>%
  group_by(state) %>%
  count()


# How many counties have any respondents
read_rds("dataset.rds") %>%
    left_join(by = "fips", y = read_rds("shapes/counties_bounds.rds") %>%
              as.data.frame() %>%
              select(fips, county_name = county) %>%
              distinct()) %>%
  group_by(id, fips) %>%
  select(id, fips, county_name) %>%
  group_by(fips, county_name) %>%
  count() %>%
  ungroup() %>%
  arrange(desc(n)) %>%
  dim()

# How many counties have multiple respondents
read_rds("dataset.rds") %>%
    left_join(by = "fips", y = read_rds("shapes/counties_bounds.rds") %>%
              as.data.frame() %>%
              select(fips, county_name = county) %>%
              distinct()) %>%
  group_by(id, fips) %>%
  select(id, fips, county_name) %>%
  group_by(fips, county_name) %>%
  count() %>%
  ungroup() %>%
  arrange(desc(n)) %>%
  filter(n > 1) %>%
  dim()


# Counties with just one respondent
mycounties <- read_rds("dataset.rds") %>%
    left_join(by = "fips", y = read_rds("shapes/counties_bounds.rds") %>%
              as.data.frame() %>%
              select(fips, county_name = county) %>%
              distinct()) %>%
  group_by(id, fips) %>%
  select(id, fips, county_name) %>%
  group_by(fips, county_name) %>%
  count() %>%
  ungroup() %>%
  arrange(desc(n)) %>%
  filter(n == 1) %>%
  select(fips) %>%
  unique() %>%
  unlist()

# Respondents who are the only respondents in their county
read_rds("dataset.rds") %>%
    left_join(by = "fips", y = read_rds("shapes/counties_bounds.rds") %>%
              as.data.frame() %>%
              select(fips, county_name = county) %>%
              distinct()) %>%
  filter(fips %in% mycounties) %>%
  count()

# Top counties
read_rds("dataset.rds") %>%
    left_join(by = "fips", y = read_rds("shapes/counties_bounds.rds") %>%
              as.data.frame() %>%
              select(fips, county_name = county) %>%
              distinct()) %>%
  group_by(id, fips) %>%
  select(id, fips, county_name) %>%
  group_by(fips, county_name) %>%
  count() %>%
  ungroup() %>%
  arrange(desc(n)) %>%
  head()
```

```{r}
# Top counties
read_rds("dataset.rds") %>%
    left_join(by = "fips", y = read_rds("shapes/counties_bounds.rds") %>%
              as.data.frame() %>%
              select(fips, county_name = county) %>%
              distinct()) %>%
  group_by(id, fips) %>%
  select(id, fips, county_name) %>%
  group_by(fips, county_name) %>%
  count() %>%
  ggplot(mapping = aes(x = n)) +
  geom_histogram(fill = "darkgrey", color = "black", bins = 30) +
  labs(x = "# of Respondents per County (x-axis log-transformed",
       y = "# of Counties")
```



```{r}
rm(list = ls())

```

## Representativeness

```{r}
library(tidyverse)
options(stringsAsFactors = FALSE)

dat <- read_rds("dataset.rds")

# We're going to use population level statistics 
# from the Census QuickFacts Site, as well as a few others,
# to record the percentage of the population that is X group for each of several different classifications.

# Quick Facts:
#https://www.census.gov/quickfacts/fact/table/US/PST045219
#https://www.cdc.gov/tobacco/data_statistics/fact_sheets/adult_data/cig_smoking/index.htm#:~:text=In%202019%2C%20nearly%2014%20of,with%20a%20smoking%2Drelated%20disease.

# https://www.census.gov/data/tables/2018/demo/families/cps-2018.html
# https://www2.census.gov/programs-surveys/demo/tables/families/2018/cps-2018/taba1-all.xls

pop <- bind_rows(
  data.frame(
    group = c("Yes", "No"),
    freq = c(0.492, 0.508),
    type = "Female"),
  
  data.frame(
    group = c("White", "Black", "Hispanic", "Asian", "Other"),
    freq = c(.604, .134, .183, .059, 0.02),
    type = "Race (Qualtrics)"),
  
  data.frame(
    group = c("Yes", "No"),
    freq =  c(0.629, .371),
    type = "In Labor Force"),
  
  data.frame(
    group = c("Yes", "No"),
    freq =  c(.10, .90),
    type = "Uninsured"),
  
  data.frame(
    group = c("Yes", "No"),
    freq = c(.118, .882),
    type = "Income Under $10,000"),
  
  data.frame(
    group = c("Yes", "No"),
    freq =  c(.315, .685),
    type = "Completed Bachelor's Degree"),
  
  data.frame(
    group = c("Yes", "No"),
    freq = c(.14, 0.86),
    type = "At Risk from Smoking"),
  
  data.frame(
    group = c("Married","Never married","Divorced",
               #"A member of an unmarried couple", (added to never married)
               "Separated","Widowed"),
  freq = c((0.486+0.015), 0.323, 0.099, 0.019, 0.058),
  type = "Marital Status"))

# Now compare those population level stats with the survey
compare <- dat %>%
  # For the following variables
  select(id, "female", "race", "poverty", "bachelor", 
         "labor_force", "marriage2", risk_from_smoking = risk_from_smoking, "uninsured") %>%
  # pivot longer for tidy calculation
  pivot_longer(cols = -c(id), names_to = "type", values_to = "group") %>%
  # Calculate how many are in each group
  group_by(type, group) %>%
  summarize(freq = n()) %>%
  # now turn that into a frequency
  ungroup() %>%
  group_by(type) %>%
  mutate(freq = round(freq / sum(freq, na.rm = TRUE), 3)) %>%
  ungroup() %>%
  # bind-in population level status
  bind_rows(pop, .id = "sample") %>%
  mutate(sample = sample %>% recode_factor(
    "2" = "US Population",
    "1" = "Survey Sample")) %>%
    # And update labels
  mutate(type = type %>% dplyr::recode_factor(
    "bachelor" = "Completed Bachelor's Degree",
    "female" = "Female",
    "race" = "Race (Qualtrics)",
    "labor_force" = "In Labor Force",
    "poverty" = "Income Under $10,000",
    "marriage2" = "Marital Status",
    "risk_from_smoking" = "At Risk from Smoking",
    "uninsured" = "Uninsured"),
    group = group %>% dplyr::recode("aa" = "Black",
                                    "white" = "White",
                                    "hispanic" = "Hispanic",
                                    "asian" = "Asian",
                                    "other" = "Other",
                                    "yes" = "Yes",
                                    "no" = "No",
                                    "in labor force" = "Yes",
                                    "not in labor force" = "No")) %>%
  mutate(myorder = as.numeric(type)) 

compare %>%
  ggplot(mapping = aes(x = reorder(group, freq), y = freq,
                       color = sample,
                       fill = sample,
                       label = round(freq, 2))) +
  geom_col(stat="identity", position = "dodge", color = "white") +
  geom_text(position = position_dodge(width = 1), vjust = -0.1) +
  facet_wrap(~reorder(type, myorder), scales = "free_x",ncol = 2) +
  theme_classic(base_size = 12) +
  theme(legend.position = "top", legend.box = "vertical",
        axis.text = element_text(angle = 0.180),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Response", y = "% Respondents", fill = "Type") +
  scale_fill_grey() +
  scale_color_grey(start = 0, end = 0.4) +
  ylim(0, 1.05) +
  guides(color = FALSE) +
  ggsave("viz/fig_1.png", dpi = 500, height = 8, width = 8)


# Now, generate those traits again...
```

## Distributions


First, let's generate descriptive statistics on our main outcome variables, the number of days of poor physical and mental health a person experiences per month.

```{r}
g1 <- read_rds("dataset.rds") %>% 
  select(id, days_poor_physical_health, days_poor_mental_health) %>%
  pivot_longer(
    cols = -c(id),
    names_to = "measure",
    values_to = "value") %>%
  mutate(measure = measure %>% recode_factor(
        "days_poor_physical_health" = "Days of Poor Physical Health",
    "days_poor_mental_health" = "Days of Poor Mental Health")) %>%
  ggplot(mapping = aes(x = value, group = measure, 
                       fill = measure, color = measure)) +
  geom_histogram(aes(y =..density..), color = "white", bins = 15) + 
  geom_density(col = "black", alpha = 0.5) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(color = "black", fill = NA),
        plot.subtitle = element_text(hjust = 0.5)) +
  facet_wrap(~measure) +
  labs(subtitle = "2019-2020 Individual-Level Qualtrics Survey",
       x = "Days of Poor Health Each Month", y = "(%) Respondents (n = 2752)") +
  guides(fill = FALSE, color = FALSE) +
  scale_fill_manual(values = c("black", "darkgrey")) +
  scale_color_manual(values = c("darkgrey", "black")) 
g1
```


```{r}
read_csv("county_dataset.csv") %>% 
  filter(!is.na(days_poor_mental_health) & !is.na(days_poor_physical_health)) %>% dim()

g2 <- read_csv("county_dataset.csv") %>%
  select(fips, 
         `Days of Poor Mental Health` = days_poor_mental_health, 
         `Days of Poor Physical Health` = days_poor_physical_health) %>%
  pivot_longer(cols = -fips, names_to = "measure", values_to = "value") %>%
  ggplot(mapping = aes(x = value, group = measure, fill = measure, color = measure)) +
  geom_histogram(aes(y =..density..), color = "white", bins = 15) + 
  geom_density(alpha = 0.5) +
  facet_wrap(~measure) +
  theme_classic(base_size = 14) +
  labs(
    subtitle = "2016 County-level Behavioral Risk Factor Surveillance System Survey",
    x = "Days of Poor Health Each Month", y = "(%) Counties (n = 3142)") +
  guides(fill = FALSE) +
  theme(panel.border = element_rect(color = "black", fill = NA),
        plot.subtitle = element_text(hjust = 0.5)) +
  facet_wrap(~measure) +
  guides(fill = FALSE, color = FALSE) +
  scale_fill_manual(values = c("black", "darkgrey")) +
  scale_color_manual(values = c("darkgrey", "black")) 
```

```{r}
library(ggpubr)
library(tidyverse)
ggpubr::ggarrange(g1,g2, ncol = 1) +
  ggsave("viz/distributions.png", dpi = 500, height = 8, width = 8)
 
```

```{r}
read_csv("county_dataset.csv") %>%
  select(fips, days_poor_ment_health_2016, days_poor_phys_health_2016) %>%
  pivot_longer(cols = -fips, names_to = "measure", values_to = "value") %>%
  group_by(measure) %>%
  summarize(median = median(value, na.rm = TRUE),
            mean = mean(value, na.rm = TRUE) %>% round(2),
            sd = sd(value, na.rm = TRUE) %>% round(2),
            min = min(value, na.rm = TRUE),
            max = max(value, na.rm = TRUE))
```


They're in the same ballpark, which is good - 2 vs. 3.9 is pretty close.
But our distributions look rather different. However, I believe the county distributions naturally are more bell-shaped, because they are average scores per county, as opposed to overall scores. Still, it's clear that we should use some strategies to get around the zero-inflation issue in our individual data.


## Validation

Next, we're going to validate our measure of perceived polarization up against others. How often are they correlated with the expected measures?

```{r}
dat <- read_rds("dataset.rds") %>%
  select(id, party_7,party_3, contains("diff"), affective_polarization_words, 
         contains("would"), contains("ifmarried"))

# Simplest
dat %>%
  ggplot(mapping = aes(x = affective_polarization_words)) +
  geom_density()

dat %>%
  filter(!is.na(affective_polarization_words)) %>%
  summarize(cor = cor(diff_self_state_voter_avg, affective_polarization_words))


dat %>%
  group_by(party_3, ifmarried_dem, ifmarried_rep) %>%
  count()
```



## Descriptive Statistics Table

```{r}

# Get table of numeric variables
dat <- read_rds("dataset.rds")  %>%
   select(id, "days_poor_physical_health", "days_poor_mental_health",
                        "diff_self_us_voter_avg", "diff_self_state_voter_avg",
                        "social_trust_index", "diversity_friendship_network_index",
                        "help_from_local_govt", "bonding", "bridging", "linking",
                        "bmi", "party_7", "age", "income2") %>%
  pivot_longer(cols = -c(id), names_to = "measure", values_to = "value") %>%
  
  mutate(measure = measure %>% recode_factor(
    "days_poor_physical_health" = "Days of Poor Physical Health",
    "days_poor_mental_health" = "Days of Poor Mental Health",
    "diff_self_us_voter_avg" = "Polarization\n(compared to Median US Voter)",
    "diff_self_state_voter_avg" = "Polarization\n(compared to Median State Voter)",
    "social_trust_index" = "Social Trust",
    "diversity_friendship_network_index" = "Friendship Diversity",
    "help_from_local_govt" = "Help from Local Govt",
    # Area Level
    "bonding" = "Bonding",
    "bridging" = "Bridging",
    "linking" = "Linking",
    # Covariates
    "bmi" = "Body Mass Index",
    "party_7" = "Party Identification\n(7pt) (Dem-Rep)",
    "age" = "Age",
    "income2" = "Income")) %>%
  group_by(measure) %>%
  summarize(
    `Mean` = mean(value, na.rm = TRUE) %>% round(2),
    `Median` = median(value, na.rm = TRUE) %>% round(2),
    `Std. Dev.` = sd(value, na.rm = TRUE) %>% round(2),
    `Min` = min(value, na.rm = TRUE) %>% round(2),
    `Max` = max(value, na.rm = TRUE) %>% round(2),
    `Obs.` = sum(!is.na(value)) %>% round(0),
    `% Missing` = round(sum(is.na(value)) / n() * 100, 1)  ) %>%
  ungroup() %>%
  mutate_at(vars(`Mean`:`% Missing`), funs(as.character(.))) %>%
  pivot_longer(cols = -c(measure), names_to = "type", values_to = "stat") %>%
  mutate(type = factor(type, levels = c("Mean", "Median", "Std. Dev.", 
                                        "Min", "Max", "Obs.", "% Missing")))

g1 <- dat %>%
  mutate(order = as.numeric(measure)) %>%
  ggplot(mapping = aes(x = "", y = reorder(measure, -order), label = stat)) +
  geom_tile(fill = "white", color = "darkgrey") +
  geom_text(color = "black") +
  scale_x_discrete(position = "top") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        axis.ticks.x = element_blank())  +
  facet_grid(~type, scales = "free") +
  labs(x = "Descriptive Statistics (Continuous Variables)",
       y = NULL) +
    ggsave("viz/table_A1.png", dpi = 500, width = 8.5, height = 6)


# Repeat for Categorical Variables
dat <- read_rds("dataset.rds") %>%
   select(id, days_poor_physical_health, days_poor_mental_health,
         polarized_aggregate,
         race, female, somecollege,
         nevermarried, employment, uninsured, religion,
         urban, risk_from_smoking) %>%
  mutate(freq_physical_distress = if_else(days_poor_physical_health >= 14, 1, 0),
         freq_mental_distress = if_else(days_poor_mental_health >= 14, 1, 0)) %>%
  select(-days_poor_physical_health, -days_poor_mental_health) %>%
  fastDummies::dummy_cols(select_columns = names(.)[-1],
                          ignore_na = TRUE,remove_selected_columns = TRUE)  %>%
  pivot_longer(cols = -c(id), names_to = "measure", values_to = "value") %>%
  filter(measure %in% c("freq_physical_distress_1", "freq_mental_distress_1",
                        "polarized_aggregate_different", "female_Yes",
                        "race_aa","race_white", "race_hispanic", "race_asian", "race_other",
                        "somecollege_Yes", "nevermarried_yes", "employment_not in labor force",
                        "employment_unemployed", "employment_employed",
                        "religion_Protestant", "religion_Catholic", 
                        "religion_Another type of Christian","religion_Jewish",
                        "religion_Muslim", "religion_No religion",
                   "religionSome other religion",
                   "uninsured_Yes",
                   "risk_from_smoking_yes",
                   "urban_urbanized_area",
                   "urban_urban_cluster",
                   "urban_rural")) %>%
  mutate(measure = measure %>% recode_factor(
   "freq_physical_distress_1" = "Frequent Physical Distress",
  "freq_mental_distress_1" = "Frequent Mental Distress",
  "polarized_aggregate_different" = "Polarization\n(Compared to County Outcome)",
  "female_Yes" = "Female",
  "race_white" = "White",
  "race_aa" = "Black", 
  "race_hispanic" = "Hispanic", 
  "race_asian" = "Asian", 
  "race_other" = "Other race", 
  "somecollege_Yes" = "At least some college", 
  "nevermarried_yes" = "Never Married", 
  "employment_not in labor force" = "Not in Labor Force", 
  "employment_unemployed" = "Unemployed", 
  "employment_employed" = "Employed",
  "religion_Protestant" = "Protestant",
  "religion_Catholic" = "Catholic", 
  "religion_Another type of Christian" = "Other Christian",
  "religion_Jewish" = "Jewish", 
  "religion_Muslim" = "Muslim",
  "religion_No religion" = "No religion",  
  "religion_Some other religion" = "Other religion",
  "uninsured_Yes" = "Uninsured",
  "risk_from_smoking_yes" = "Risk from Smoking",
  "urban_urbanized_area" = "Heavily Urbanized Area",
  "urban_urban_cluster" = "Urban Cluster",
  "urban_rural" = "Rural")) %>%
  group_by(measure) %>%
  summarize(
    `Count` = sum(value == 1, na.rm = TRUE) %>% round(0),
    `% Frequency` = round(sum(value == 1, na.rm = TRUE) / n() * 100, 1),
    `Obs.` = sum(!is.na(value)) %>% round(0),
    `% Missing` = round(sum(is.na(value)) / n() * 100, 1)  ) %>%
  ungroup() %>%
  mutate_at(vars(`Count`:`% Missing`), funs(as.character(.))) %>%
  pivot_longer(cols = -c(measure), names_to = "type", values_to = "stat") %>%
  mutate(type = factor(type, levels = c("Count", "% Frequency",
                                       "Obs.", "% Missing")))


dat %>%
  mutate(order = as.numeric(measure)) %>%
  ggplot(mapping = aes(x = "", y = reorder(measure, -order), label = stat)) +
  geom_tile(fill = "white", color = "darkgrey") +
  geom_text(color = "black") +
  scale_x_discrete(position = "top") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        axis.ticks.x = element_blank())  +
  facet_grid(~type, scales = "free") +
  labs(x = "Descriptive Statistics (Categorical Variables)",
       y = NULL) +
  ggsave("viz/table_A2.png", dpi = 500, width = 8, height = 7)

remove(dat, g1,g2)
```

## County Descriptives Table

```{r}

# Get table of numeric variables
dat <- read_csv("county_dataset.csv")  %>%
  # Calculate size of partisan gap (a polarization equivalent)
  mutate(partisan_gap_2016 = abs(democrat_2016 - republican_2016)) %>%
  select(fips, 
         days_poor_physical_health, days_poor_mental_health,
         frequent_phys_distress, frequent_ment_distress,
         partisan_gap_2016, 
         democrat_2016, #republican_2016,
         bonding,  bridging,  linking, 
         obesity,  
         median_age,  
         pop_black,  pop_hisplat,  pop_asian,  pop_natam,  pop_pacific, 
         median_household_income,  smoking, #urban,
         pop_female, 
         pop_some_college, 
         pop_never_married,  
         pop_unemployed, 
         protestant,  catholic,  jewish, 
         muslim,  other_adherents,  uninsured) %>%
#  mutate(urban = factor(urban)) %>%
  pivot_longer(cols = -c(fips), names_to = "measure", values_to = "value") %>%
  mutate(measure = measure %>% recode_factor(
    "days_poor_physical_health" = "Days of Poor Physical Health",
    "days_poor_mental_health" = "Days of Poor Mental Health",
    "frequent_phys_distress" = "% with 14 Days+ of Poor Physical Health",
    "frequent_ment_distress" = "% with 14 Days+ of Poor Mental Health",
      "partisan_gap_2016" = "Polarization (Gap in Democrat\nvs. Republican Vote (%) in 2016)",
  "bonding" = "Bonding Social Capital",
  "bridging" = "Bridging Social Capital",
  "linking" = "Linking Social Capital",
  "obesity" = "% Obese",
  "democrat_2016" = "% Voted Democrat",
  "median_age" = "Median Age",
  "pop_female" = "% Women",
  "median_household_income" = "Median Household Income",
  "pop_black" = "% Black",
  "pop_hisplat" = "% Hispanic / Latino",
  "pop_asian" = "% Asian",
  "pop_natam" = "% Native American",
  "pop_pacific" = "% Hawaiian /Pacific Islander",
  "pop_some_college" = "% At least some college",
  "pop_never_married" = "% Never Married",
  "pop_unemployed" = "Unemployment Rate",
  "protestant" = "Protestant per 1000 residents",
  "catholic" = "Catholics per 1000 residents",
  "jewish" = "Jews per 1000 residents",
  "muslim" = "Muslims per 1000 residents",
  "other_adherents" = "Other religious adherents\nper 1000 residents",
  "uninsured" = "% Uninsured",
  "smoking" = "% At Risk from Smoking"
#  "urbanurbanized_area" = "Heavily Urbanized Area",
#  "urbanurban_cluster" = "Urban Cluster"
  )) %>%
  group_by(measure) %>%
  summarize(
    `Mean` = mean(value, na.rm = TRUE) %>% round(2),
    `Median` = median(value, na.rm = TRUE) %>% round(2),
    `Std. Dev.` = sd(value, na.rm = TRUE) %>% round(2),
    `Min` = min(value, na.rm = TRUE) %>% round(2),
    `Max` = max(value, na.rm = TRUE) %>% round(2),
    `Obs.` = sum(!is.na(value)) %>% round(0),
    `% Missing` = round(sum(is.na(value)) / n() * 100, 1)  ) %>%
  ungroup() %>%
  mutate_at(vars(`Mean`:`% Missing`), funs(as.character(.))) %>%
  pivot_longer(cols = -c(measure), names_to = "type", values_to = "stat") %>%
  mutate(type = factor(type, levels = c("Mean", "Median", "Std. Dev.", 
                                        "Min", "Max", "Obs.", "% Missing")))


dat %>%
  mutate(order = as.numeric(measure)) %>%
  ggplot(mapping = aes(x = "", y = reorder(measure, -order), label = stat)) +
  geom_tile(fill = "white", color = "darkgrey") +
  geom_text(color = "black") +
  scale_x_discrete(position = "top") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        axis.ticks.x = element_blank())  +
  facet_grid(~type, scales = "free") +
  labs(x = "Descriptive Statistics (Continuous Variables)",
       y = NULL) +
    ggsave("viz/table_A3.png", dpi = 500, width = 9, height = 8)

# Repeat for Categorical Variables
dat <- read_csv("county_dataset.csv") %>%
   select(fips, urban) %>%
  fastDummies::dummy_cols(select_columns = names(.)[-1],
                          ignore_na = TRUE,remove_selected_columns = TRUE)  %>%
  pivot_longer(cols = -c(fips), names_to = "measure", values_to = "value") %>%
  filter(measure %in% c("urban_urbanized_area",
                   "urban_urban_cluster",
                   "urban_rural")) %>%
  mutate(measure = measure %>% recode_factor(
  "urban_urbanized_area" = "Heavily Urbanized Area",
  "urban_urban_cluster" = "Urban Cluster",
  "urban_rural" = "Rural")) %>%
  group_by(measure) %>%
  summarize(
    `Count` = sum(value == 1, na.rm = TRUE) %>% round(0),
    `% Frequency` = round(sum(value == 1, na.rm = TRUE) / n() * 100, 1),
    `Obs.` = sum(!is.na(value)) %>% round(0),
    `% Missing` = round(sum(is.na(value)) / n() * 100, 1)  ) %>%
  ungroup() %>%
  mutate_at(vars(`Count`:`% Missing`), funs(as.character(.))) %>%
  pivot_longer(cols = -c(measure), names_to = "type", values_to = "stat") %>%
  mutate(type = factor(type, levels = c("Count", "% Frequency",
                                       "Obs.", "% Missing")))


dat %>%
  mutate(order = as.numeric(measure)) %>%
  ggplot(mapping = aes(x = "", y = reorder(measure, -order), label = stat)) +
  geom_tile(fill = "white", color = "darkgrey") +
  geom_text(color = "black") +
  scale_x_discrete(position = "top") +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        axis.ticks.x = element_blank())  +
  facet_grid(~type, scales = "free") +
  labs(x = "Descriptive Statistics (Categorical Variables)",
       y = NULL) +
  ggsave("viz/table_A4.png", dpi = 500, width = 8, height = 1.5)

remove(dat, g1,g2)
```



# 4. Multiple Imputation

```{r}
library(Amelia)

# First, use individual level survey 
dat <- read_rds("dataset.rds") %>%
  select(id, fips, state, days_poor_physical_health, days_poor_mental_health,
         polarized_aggregate,
         race, female, somecollege,
         nevermarried, employment, uninsured, religion,
         urban, risk_from_smoking,
         "diff_self_us_voter_avg", "diff_self_state_voter_avg",
         "social_trust_index", "diversity_friendship_network_index",
         "help_from_local_govt", "bonding", "bridging", "linking",
         "bmi", "party_7", "age", "income2") %>%
  mutate(freq_physical_distress = if_else(days_poor_physical_health >= 14, 1, 0),
         freq_mental_distress = if_else(days_poor_mental_health >= 14, 1, 0))  %>%
  mutate_at(vars(c("race", "female", "somecollege", "polarized_aggregate",
                  "nevermarried", "employment", "uninsured",
                  "religion", "urban", "risk_from_smoking", 
                  "freq_physical_distress", "freq_mental_distress", "state", "urban")),
            funs(as.factor(.))) %>%
  mutate(polarized_aggregate = polarized_aggregate %>% relevel(ref = "same")) %>%
  select(id,  "fips", "state",
         days_poor_physical_health, days_poor_mental_health,
         freq_physical_distress, freq_mental_distress,
         # ordinal variables
         party_7,income2,
         # numeric
         bmi, age, 
         bonding, bridging, linking, 
         help_from_local_govt, diversity_friendship_network_index, social_trust_index,
         diff_self_us_voter_avg, diff_self_state_voter_avg,
         # categorical
         race, female, somecollege,
         nevermarried, employment, uninsured, religion,
         urban, risk_from_smoking, polarized_aggregate) %>%
  as.data.frame() 

# Create a set of logical bounds based on min and max
mybounds <- bind_cols(
  # Get lower bounds
  dat %>%
    summarize_at(vars(-c(id,fips,state,party_7,income2,
                         "freq_physical_distress", "freq_mental_distress",
                         "race", "female", "somecollege",
                         "nevermarried", "employment", "uninsured", "religion",
                         "urban", "risk_from_smoking", "polarized_aggregate")),
                 funs(min(., na.rm = TRUE))) %>%
    t() %>%
    data.frame(lower.bound = .),
  # Get upper bounds
  dat %>%
    summarize_at(vars(-c(id,fips,state,party_7,income2,
                         "freq_physical_distress", "freq_mental_distress",
                         "race", "female", "somecollege",
                         "nevermarried", "employment", "uninsured", "religion",
                         "urban", "risk_from_smoking", "polarized_aggregate")),
                 funs(max(., na.rm = TRUE))) %>%
    t() %>%
    data.frame(upper.bound = .)
) %>%
  # Add column name for easy join
  tibble::rownames_to_column(var = "term") %>%
  # join in the number of the column,
  # having already subtracted the id numbers
  left_join(by = "term",
            y = dat %>%
              names(.) %>%
              data.frame(term = .) %>%
              mutate(column.number = 1:n())) %>%
  select(term, column.number, lower.bound, upper.bound) 

# Set a seed for replication
set.seed(15567)
# Run multiple imputation sequence
dat %>% 
  amelia(m = 5, idvars = c("id", "fips", "state"), ords = c("party_7", "income2"),
         noms = c(
           "freq_physical_distress", "freq_mental_distress",
           "race", "female", "somecollege",
           "nevermarried", "employment", "uninsured", "religion",
           "urban", "risk_from_smoking", "polarized_aggregate"),
         bounds = mybounds %>% select(-term) %>% as.matrix(), 
         max.resample = 1000) %>%
  saveRDS("dataset_mi.rds")

```


```{r}
## Repeat for county data
dat <- read_csv("county_dataset.csv") %>% 
    # Transform measures to account for multicolinearity
  # Since some places reported 0 hispanic or latino residents,
  # we're going to A) turn that into a % by multiplying by 100
  # and then add 1, before log-transforming, to fix the zero problem.
  mutate(pop_hisplat = log(pop_hisplat*100 + 1)) %>%
  mutate(pop_never_married = log(pop_never_married)) %>%
  mutate(uninsured = ntile(uninsured, 6)) %>%
  # Calculate size of partisan gap (a polarization equivalent)
  mutate(partisan_gap_2016 = abs(democrat_2016 - republican_2016)) %>%
  select(fips, state,
         days_poor_physical_health, days_poor_mental_health,
         frequent_phys_distress, frequent_ment_distress,
         partisan_gap_2016, 
         democrat_2016, republican_2016,
         bonding,  bridging,  linking, 
         obesity,  republican_2016,  
         median_age,  
         pop_black,  pop_hisplat,  pop_asian,  pop_natam,  pop_pacific, 
         median_household_income,  smoking, urban,
         pop_female, 
         pop_some_college, 
         pop_never_married,  
         pop_unemployed, 
         protestant,  catholic,  jewish,  muslim,  other_adherents,  uninsured) %>%
  mutate(urban = factor(urban), state = factor(state)) %>%
    as.data.frame()


# Create a set of logical bounds based on min and max
mybounds <- bind_cols(
  # Get lower bounds
  dat %>%
    summarize_at(vars(-c(fips,state,
                         "urban")),
                 funs(min(., na.rm = TRUE))) %>%
    t() %>%
    data.frame(lower.bound = .),
  # Get upper bounds
  dat %>%
    summarize_at(vars(-c(fips,state,"urban")),
                 funs(max(., na.rm = TRUE))) %>%
    t() %>%
    data.frame(upper.bound = .)
) %>%
  # Add column name for easy join
  tibble::rownames_to_column(var = "term") %>%
  # join in the number of the column,
  # having already subtracted the id numbers
  left_join(by = "term",
            y = dat %>%
              names(.) %>%
              data.frame(term = .) %>%
              mutate(column.number = 1:n())) %>%
  select(term, column.number, lower.bound, upper.bound)

dat %>%
  amelia(m = 5, idvars = c("fips", "state"),
         noms = c("urban"),
         bounds = mybounds %>% select(-term) %>% as.matrix(),
         max.resample = 1000) %>%
  saveRDS("county_dataset_mi.rds")


```


# 5. Survey Modeling

## Fixed vs. Random

```{r}
library(Amelia)
library(Zelig)
library(plm)

dat <- read_rds("dataset_mi.rds")


# First, we need to justify fixed vs. random effects by state.
# The Hausman test is easiest here.

# We're going to run in on one of the imputations to confirm
mw <- dat$imputations$imp5 %>%
  plm(formula = log(days_poor_physical_health + 1) ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban, model = "within", effect = "time", index = c("id", "state"))
mr <- dat$imputations$imp5 %>%
  plm(formula = log(days_poor_physical_health + 1) ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban, model = "random", effect = "time", index = c("id", "state"))
# A statistically significant Hausman test, for each imputation, 
# indicates that states serve better as fixed effects than random effects.
phtest(mw,mr)
# p < 0.007
# p < 0.002
# p < 0.02
# p < 0.005
# p < 0.007
remove(mw, mr)

# Plus, they theoretically serve a better purpose as fixed effects,
# because it makes sense that each state would have a specific, fixed advantage/disadvantage
# towards health and political conditions, not a random one.
```

## Physical Health

### Negative Binomial

#### Simple Models

```{r}
dat <- read_rds("dataset_mi.rds")

# Physical Health
## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "negbin")

## US Level
m2 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "negbin")

## US Level
m3 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "negbin")




m4 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_state_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "negbin")

## US Level
m5 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "negbin")

## US Level
m6 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "negbin")




m7 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          polarized_aggregate +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "negbin")

## US Level
m8 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "negbin")

## US Level
m9 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "negbin")


get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8,m9),
  custom.header = list("Effect of US-Level Polarization" = 1:3,
                       "Effect of State-Level Polarization" = 4:6,
                       "Effect of County-Level Polarization" = 7:9),
  custom.model.names = rep(c("Basic Model", 
                             "Interactions with<br>Individual<br>Social Capital",
                         "Interactions with<br>County<br>Social Capital"), 3),
  stars = c(0.001, 0.01, 0.05, 0.1), 
  single.row = TRUE,
  file = "viz/table_B1.html",
  bold = 0.10,
  caption.above = TRUE,
  caption = "<b>Negative Binomial Models of Days of Poor Physical Health per Month (n = 2752)</b><br><i>With Fixed Effects by State (n = 51, including DC) and Multiple Imputation<i>",
  custom.coef.map = list(
  "diff_self_us_voter_avg" = "Polarization",
  "diff_self_state_voter_avg" = "Polarization",
  "polarized_aggregatedifferent" = "Polarization",
  "social_trust_index" = "Social Trust",
  "diversity_friendship_network_index" = "Friendship Diversity",
  "help_from_local_govt" = "Help from Local Govt",
  # Individual Level
  "diff_self_us_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "diff_self_state_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "polarized_aggregatedifferent:social_trust_index" = "Polarization x Social Trust",
  
  "diff_self_us_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_state_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "polarized_aggregatedifferent:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_us_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "diff_self_state_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "polarized_aggregatedifferent:help_from_local_govt" = "Polarization x Help from Local Govt",
  
  # Area Level
  "bonding" = "Bonding",
  "bridging" = "Bridging",
  "linking" = "Linking",
  
  "diff_self_us_voter_avg:bonding" = "Polarization x Bonding",
  "diff_self_state_voter_avg:bonding" = "Polarization x Bonding",
  "polarized_aggregatedifferent:bonding" = "Polarization x Bonding",
  
  "diff_self_us_voter_avg:bridging" = "Polarization x Bridging",
  "diff_self_state_voter_avg:bridging" = "Polarization x Bridging",
  "polarized_aggregatedifferent:bridging" = "Polarization x Bridging",
  
  "diff_self_us_voter_avg:linking" = "Polarization x Linking",
  "diff_self_state_voter_avg:linking" = "Polarization x Linking",
  "polarized_aggregatedifferent:linking" = "Polarization x Linking",
  
  # Covariates
  
  "bmi" = "Body Mass Index",
  "party_7" = "Party Identification (7pt) (Dem-Rep)",
  "age" = "Age",
  "femaleYes" = "Female",
  "income" = "Income", 
  "raceaa" = "Black", 
  "racehispanic" = "Hispanic", 
  "raceasian" = "Asian", 
  "raceother" = "Other race", 
  "somecollegeYes" = "At least some college", 
  "nevermarriedyes" = "Never Married", 
  "employmentnot in labor force" = "Not in Labor Force", 
  "employmentunemployed" = "Unemployed", 
  "religionProtestant" = "Protestant",
  "religionCatholic" = "Catholic", 
  "religionAnother type of Christian" = "Other Christian",
  "religionJewish" = "Jewish", 
  "religionMuslim" = "Muslim",
  "religionNo religion" = "No religion",  
  "religionSome other religion" = "Other religion",
  "uninsuredYes" = "Uninsured",
  "risk_from_smokingyes" = "At Risk from Smoking",
  "urbanurbanized_area" = "Heavily Urbanized Area",
  "urbanurban_cluster" = "Urban Cluster"),
  include.nobs = FALSE,
  custom.gof.rows = list(
    "Mean VIF" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_r2(.)) %>% unlist()
   # "Num. Obs." = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
  #    map(~get_obs(.)) %>% unlist()
  ),
  groups = list("<b>Key Variable</b>" = 1,
                "<b>Individual Social Capital</b>" = 2:4,
                "<b>Individual Interactions</b>" = 5:7,
                "<b>County Social Capital</b>" = 8:10,
                "<b>County Interactions</b>" = 11:13)
  
)

```


#### With Controls

```{r}
dat <- read_rds("dataset_mi.rds")

# Physical Health
## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m2 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m3 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")




m4 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_state_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m5 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m6 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")




m7 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          polarized_aggregate +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m8 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m9 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8,m9),
  custom.header = list("Effect of US-Level Polarization" = 1:3,
                       "Effect of State-Level Polarization" = 4:6,
                       "Effect of County-Level Polarization" = 7:9),
  custom.model.names = rep(c("Basic Model", 
                             "Interactions with<br>Individual<br>Social Capital",
                         "Interactions with<br>County<br>Social Capital"), 3),
  stars = c(0.001, 0.01, 0.05, 0.1), 
  single.row = TRUE,
  file = "viz/table_B2.html",
  bold = 0.10,
  caption.above = TRUE,
  caption = "<b>Negative Binomial Models of Days of Poor Physical Health per Month (n = 2752)</b><br><i>With Fixed Effects by State (n = 51, including DC) and Multiple Imputation<i>",
  custom.coef.map = list(
  "diff_self_us_voter_avg" = "Polarization",
  "diff_self_state_voter_avg" = "Polarization",
  "polarized_aggregatedifferent" = "Polarization",
  "social_trust_index" = "Social Trust",
  "diversity_friendship_network_index" = "Friendship Diversity",
  "help_from_local_govt" = "Help from Local Govt",
  # Individual Level
  "diff_self_us_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "diff_self_state_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "polarized_aggregatedifferent:social_trust_index" = "Polarization x Social Trust",
  
  "diff_self_us_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_state_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "polarized_aggregatedifferent:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_us_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "diff_self_state_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "polarized_aggregatedifferent:help_from_local_govt" = "Polarization x Help from Local Govt",
  
  # Area Level
  "bonding" = "Bonding",
  "bridging" = "Bridging",
  "linking" = "Linking",
  
  "diff_self_us_voter_avg:bonding" = "Polarization x Bonding",
  "diff_self_state_voter_avg:bonding" = "Polarization x Bonding",
  "polarized_aggregatedifferent:bonding" = "Polarization x Bonding",
  
  "diff_self_us_voter_avg:bridging" = "Polarization x Bridging",
  "diff_self_state_voter_avg:bridging" = "Polarization x Bridging",
  "polarized_aggregatedifferent:bridging" = "Polarization x Bridging",
  
  "diff_self_us_voter_avg:linking" = "Polarization x Linking",
  "diff_self_state_voter_avg:linking" = "Polarization x Linking",
  "polarized_aggregatedifferent:linking" = "Polarization x Linking",
  
  # Covariates
  
  "bmi" = "Body Mass Index",
  "party_7" = "Party Identification (7pt) (Dem-Rep)",
  "age" = "Age",
  "femaleYes" = "Female",
  "income" = "Income", 
  "raceaa" = "Black", 
  "racehispanic" = "Hispanic", 
  "raceasian" = "Asian", 
  "raceother" = "Other race", 
  "somecollegeYes" = "At least some college", 
  "nevermarriedyes" = "Never Married", 
  "employmentnot in labor force" = "Not in Labor Force", 
  "employmentunemployed" = "Unemployed", 
  "religionProtestant" = "Protestant",
  "religionCatholic" = "Catholic", 
  "religionAnother type of Christian" = "Other Christian",
  "religionJewish" = "Jewish", 
  "religionMuslim" = "Muslim",
  "religionNo religion" = "No religion",  
  "religionSome other religion" = "Other religion",
  "uninsuredYes" = "Uninsured",
  "risk_from_smokingyes" = "At Risk from Smoking",
  "urbanurbanized_area" = "Heavily Urbanized Area",
  "urbanurban_cluster" = "Urban Cluster"),
  include.nobs = FALSE,
  custom.gof.rows = list(
    "Mean VIF" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_r2(.)) %>% unlist()
   # "Num. Obs." = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
  #    map(~get_obs(.)) %>% unlist()
  ),
  groups = list("<b>Key Variable</b>" = 1,
                "<b>Individual Social Capital</b>" = 2:4,
                "<b>Individual Interactions</b>" = 5:7,
                "<b>County Social Capital</b>" = 8:10,
                "<b>County Interactions</b>" = 11:13,
                "<b>Basic Controls</b>" = 14:21,
                "<b>Extended Controls</b>" = 22:34)
  
)

```











### Logit

#### Simple Models

```{r}
# Physical Health
## US Level
m1 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "logit")

## US Level
m2 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "logit")

## US Level
m3 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "logit")




m4 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_state_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "logit")

## US Level
m5 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "logit")

## US Level
m6 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "logit")




m7 <- dat %>%
  zelig(formula = freq_physical_distress ~
          polarized_aggregate +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "logit")

## US Level
m8 <- dat %>%
  zelig(formula = freq_physical_distress ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "logit")

## US Level
m9 <- dat %>%
  zelig(formula = freq_physical_distress ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "logit")

get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}
texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8,m9),
  custom.header = list("Effect of US-Level Polarization" = 1:3,
                       "Effect of State-Level Polarization" = 4:6,
                       "Effect of County-Level Polarization" = 7:9),
  custom.model.names = rep(c("Basic Model", 
                             "Interactions with<br>Individual<br>Social Capital",
                         "Interactions with<br>County<br>Social Capital"), 3),
  stars = c(0.001, 0.01, 0.05, 0.1), 
  single.row = TRUE,
  file = "viz/table_B3.html",
  bold = 0.10,
  caption.above = TRUE,
  caption = "<b>Logit Models of Likelihood of 14 Days or More of Poor Physical Health per Month (n = 2752)</b><br><i>With Fixed Effects by State (n = 51, including DC) and Multiple Imputation<i>",
  custom.coef.map = list(
  "diff_self_us_voter_avg" = "Polarization",
  "diff_self_state_voter_avg" = "Polarization",
  "polarized_aggregatedifferent" = "Polarization",
  "social_trust_index" = "Social Trust",
  "diversity_friendship_network_index" = "Friendship Diversity",
  "help_from_local_govt" = "Help from Local Govt",
  # Individual Level
  "diff_self_us_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "diff_self_state_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "polarized_aggregatedifferent:social_trust_index" = "Polarization x Social Trust",
  
  "diff_self_us_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_state_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "polarized_aggregatedifferent:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_us_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "diff_self_state_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "polarized_aggregatedifferent:help_from_local_govt" = "Polarization x Help from Local Govt",
  
  # Area Level
  "bonding" = "Bonding",
  "bridging" = "Bridging",
  "linking" = "Linking",
  
  "diff_self_us_voter_avg:bonding" = "Polarization x Bonding",
  "diff_self_state_voter_avg:bonding" = "Polarization x Bonding",
  "polarized_aggregatedifferent:bonding" = "Polarization x Bonding",
  
  "diff_self_us_voter_avg:bridging" = "Polarization x Bridging",
  "diff_self_state_voter_avg:bridging" = "Polarization x Bridging",
  "polarized_aggregatedifferent:bridging" = "Polarization x Bridging",
  
  "diff_self_us_voter_avg:linking" = "Polarization x Linking",
  "diff_self_state_voter_avg:linking" = "Polarization x Linking",
  "polarized_aggregatedifferent:linking" = "Polarization x Linking",
  
  # Covariates
  
  "bmi" = "Body Mass Index",
  "party_7" = "Party Identification (7pt) (Dem-Rep)",
  "age" = "Age",
  "femaleYes" = "Female",
  "income" = "Income", 
  "raceaa" = "Black", 
  "racehispanic" = "Hispanic", 
  "raceasian" = "Asian", 
  "raceother" = "Other race", 
  "somecollegeYes" = "At least some college", 
  "nevermarriedyes" = "Never Married", 
  "employmentnot in labor force" = "Not in Labor Force", 
  "employmentunemployed" = "Unemployed", 
  "religionProtestant" = "Protestant",
  "religionCatholic" = "Catholic", 
  "religionAnother type of Christian" = "Other Christian",
  "religionJewish" = "Jewish", 
  "religionMuslim" = "Muslim",
  "religionNo religion" = "No religion",  
  "religionSome other religion" = "Other religion",
  "uninsuredYes" = "Uninsured",
  "risk_from_smokingyes" = "At Risk from Smoking",
  "urbanurbanized_area" = "Heavily Urbanized Area",
  "urbanurban_cluster" = "Urban Cluster"),
  include.nobs = FALSE,
  custom.gof.rows = list(
    "Mean VIF" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_r2(.)) %>% unlist()
   # "Num. Obs." = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
  #    map(~get_obs(.)) %>% unlist()
  ),
  groups = list("<b>Key Variable</b>" = 1,
                "<b>Individual Social Capital</b>" = 2:4,
                "<b>Individual Interactions</b>" = 5:7,
                "<b>County Social Capital</b>" = 8:10,
                "<b>County Interactions</b>" = 11:13)
  
)

```


#### With Controls

```{r}
# Physical Health
## US Level
m1 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m2 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m3 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")




m4 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_state_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m5 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m6 <- dat %>%
  zelig(formula = freq_physical_distress ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")




m7 <- dat %>%
  zelig(formula = freq_physical_distress ~
          polarized_aggregate +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m8 <- dat %>%
  zelig(formula = freq_physical_distress ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m9 <- dat %>%
  zelig(formula = freq_physical_distress ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}
texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8,m9),
  custom.header = list("Effect of US-Level Polarization" = 1:3,
                       "Effect of State-Level Polarization" = 4:6,
                       "Effect of County-Level Polarization" = 7:9),
  custom.model.names = rep(c("Basic Model", 
                             "Interactions with<br>Individual<br>Social Capital",
                         "Interactions with<br>County<br>Social Capital"), 3),
  stars = c(0.001, 0.01, 0.05, 0.1), 
  single.row = TRUE,
  file = "viz/table_B4.html",
  bold = 0.10,
  caption.above = TRUE,
  caption = "<b>Logit Models of Likelihood of 14 Days or More of Poor Physical Health per Month (n = 2752)</b><br><i>With Fixed Effects by State (n = 51, including DC) and Multiple Imputation<i>",
  custom.coef.map = list(
  "diff_self_us_voter_avg" = "Polarization",
  "diff_self_state_voter_avg" = "Polarization",
  "polarized_aggregatedifferent" = "Polarization",
  "social_trust_index" = "Social Trust",
  "diversity_friendship_network_index" = "Friendship Diversity",
  "help_from_local_govt" = "Help from Local Govt",
  # Individual Level
  "diff_self_us_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "diff_self_state_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "polarized_aggregatedifferent:social_trust_index" = "Polarization x Social Trust",
  
  "diff_self_us_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_state_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "polarized_aggregatedifferent:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_us_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "diff_self_state_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "polarized_aggregatedifferent:help_from_local_govt" = "Polarization x Help from Local Govt",
  
  # Area Level
  "bonding" = "Bonding",
  "bridging" = "Bridging",
  "linking" = "Linking",
  
  "diff_self_us_voter_avg:bonding" = "Polarization x Bonding",
  "diff_self_state_voter_avg:bonding" = "Polarization x Bonding",
  "polarized_aggregatedifferent:bonding" = "Polarization x Bonding",
  
  "diff_self_us_voter_avg:bridging" = "Polarization x Bridging",
  "diff_self_state_voter_avg:bridging" = "Polarization x Bridging",
  "polarized_aggregatedifferent:bridging" = "Polarization x Bridging",
  
  "diff_self_us_voter_avg:linking" = "Polarization x Linking",
  "diff_self_state_voter_avg:linking" = "Polarization x Linking",
  "polarized_aggregatedifferent:linking" = "Polarization x Linking",
  
  # Covariates
  
  "bmi" = "Body Mass Index",
  "party_7" = "Party Identification (7pt) (Dem-Rep)",
  "age" = "Age",
  "femaleYes" = "Female",
  "income" = "Income", 
  "raceaa" = "Black", 
  "racehispanic" = "Hispanic", 
  "raceasian" = "Asian", 
  "raceother" = "Other race", 
  "somecollegeYes" = "At least some college", 
  "nevermarriedyes" = "Never Married", 
  "employmentnot in labor force" = "Not in Labor Force", 
  "employmentunemployed" = "Unemployed", 
  "religionProtestant" = "Protestant",
  "religionCatholic" = "Catholic", 
  "religionAnother type of Christian" = "Other Christian",
  "religionJewish" = "Jewish", 
  "religionMuslim" = "Muslim",
  "religionNo religion" = "No religion",  
  "religionSome other religion" = "Other religion",
  "uninsuredYes" = "Uninsured",
  "risk_from_smokingyes" = "At Risk from Smoking",
  "urbanurbanized_area" = "Heavily Urbanized Area",
  "urbanurban_cluster" = "Urban Cluster"),
  include.nobs = FALSE,
  custom.gof.rows = list(
    "Mean VIF" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_r2(.)) %>% unlist()
   # "Num. Obs." = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
  #    map(~get_obs(.)) %>% unlist()
  ),
  groups = list("<b>Key Variable</b>" = 1,
                "<b>Individual Social Capital</b>" = 2:4,
                "<b>Individual Interactions</b>" = 5:7,
                "<b>County Social Capital</b>" = 8:10,
                "<b>County Interactions</b>" = 11:13,
                "<b>Basic Controls</b>" = 14:21,
                "<b>Extended Controls</b>" = 22:34)
  
)

```



## Mental Health

### Negative Binomial

#### Simple Model

```{r}

dat <- read_rds("dataset_mi.rds")


# Physical Health
## US Level
m1 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "negbin")

## US Level
m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "negbin")

## US Level
m3 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "negbin")




m4 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_state_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "negbin")

## US Level
m5 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt  + state,
        model = "negbin")

## US Level
m6 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "negbin")




m7 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          polarized_aggregate +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "negbin")

## US Level
m8 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "negbin")

## US Level
m9 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "negbin")


get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8,m9),
  custom.header = list("Effect of US-Level Polarization" = 1:3,
                       "Effect of State-Level Polarization" = 4:6,
                       "Effect of County-Level Polarization" = 7:9),
  custom.model.names = rep(c("Basic Model", 
                             "Interactions with<br>Individual<br>Social Capital",
                         "Interactions with<br>County<br>Social Capital"), 3),
  stars = c(0.001, 0.01, 0.05, 0.1), 
  single.row = TRUE,
  file = "viz/table_B5.html",
  bold = 0.10,
  caption.above = TRUE,
  caption = "<b>Negative Binomial Models of Days of Poor Mental Health per Month (n = 2752)</b><br><i>With Fixed Effects by State (n = 51, including DC) and Multiple Imputation<i>",
  custom.coef.map = list(
  "diff_self_us_voter_avg" = "Polarization",
  "diff_self_state_voter_avg" = "Polarization",
  "polarized_aggregatedifferent" = "Polarization",
  "social_trust_index" = "Social Trust",
  "diversity_friendship_network_index" = "Friendship Diversity",
  "help_from_local_govt" = "Help from Local Govt",
  # Individual Level
  "diff_self_us_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "diff_self_state_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "polarized_aggregatedifferent:social_trust_index" = "Polarization x Social Trust",
  
  "diff_self_us_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_state_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "polarized_aggregatedifferent:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_us_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "diff_self_state_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "polarized_aggregatedifferent:help_from_local_govt" = "Polarization x Help from Local Govt",
  
  # Area Level
  "bonding" = "Bonding",
  "bridging" = "Bridging",
  "linking" = "Linking",
  
  "diff_self_us_voter_avg:bonding" = "Polarization x Bonding",
  "diff_self_state_voter_avg:bonding" = "Polarization x Bonding",
  "polarized_aggregatedifferent:bonding" = "Polarization x Bonding",
  
  "diff_self_us_voter_avg:bridging" = "Polarization x Bridging",
  "diff_self_state_voter_avg:bridging" = "Polarization x Bridging",
  "polarized_aggregatedifferent:bridging" = "Polarization x Bridging",
  
  "diff_self_us_voter_avg:linking" = "Polarization x Linking",
  "diff_self_state_voter_avg:linking" = "Polarization x Linking",
  "polarized_aggregatedifferent:linking" = "Polarization x Linking"),
  include.nobs = FALSE,
  custom.gof.rows = list(
    "Mean VIF" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_r2(.)) %>% unlist()
   # "Num. Obs." = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
  #    map(~get_obs(.)) %>% unlist()
  ),
  groups = list("<b>Key Variable</b>" = 1,
                "<b>Individual Social Capital</b>" = 2:4,
                "<b>Individual Interactions</b>" = 5:7,
                "<b>County Social Capital</b>" = 8:10,
                "<b>County Interactions</b>" = 11:13)
  
)

```

#### With Controls

```{r}

dat <- read_rds("dataset_mi.rds")


# Physical Health
## US Level
m1 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m3 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")




m4 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_state_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m5 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m6 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")




m7 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          polarized_aggregate +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m8 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m9 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8,m9),
  custom.header = list("Effect of US-Level Polarization" = 1:3,
                       "Effect of State-Level Polarization" = 4:6,
                       "Effect of County-Level Polarization" = 7:9),
  custom.model.names = rep(c("Basic Model", 
                             "Interactions with<br>Individual<br>Social Capital",
                         "Interactions with<br>County<br>Social Capital"), 3),
  stars = c(0.001, 0.01, 0.05, 0.1), 
  single.row = TRUE,
  file = "viz/table_B6.html",
  bold = 0.10,
  caption.above = TRUE,
  caption = "<b>Negative Binomial Models of Days of Poor Mental Health per Month (n = 2752)</b><br><i>With Fixed Effects by State (n = 51, including DC) and Multiple Imputation<i>",
  custom.coef.map = list(
  "diff_self_us_voter_avg" = "Polarization",
  "diff_self_state_voter_avg" = "Polarization",
  "polarized_aggregatedifferent" = "Polarization",
  "social_trust_index" = "Social Trust",
  "diversity_friendship_network_index" = "Friendship Diversity",
  "help_from_local_govt" = "Help from Local Govt",
  # Individual Level
  "diff_self_us_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "diff_self_state_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "polarized_aggregatedifferent:social_trust_index" = "Polarization x Social Trust",
  
  "diff_self_us_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_state_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "polarized_aggregatedifferent:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_us_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "diff_self_state_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "polarized_aggregatedifferent:help_from_local_govt" = "Polarization x Help from Local Govt",
  
  # Area Level
  "bonding" = "Bonding",
  "bridging" = "Bridging",
  "linking" = "Linking",
  
  "diff_self_us_voter_avg:bonding" = "Polarization x Bonding",
  "diff_self_state_voter_avg:bonding" = "Polarization x Bonding",
  "polarized_aggregatedifferent:bonding" = "Polarization x Bonding",
  
  "diff_self_us_voter_avg:bridging" = "Polarization x Bridging",
  "diff_self_state_voter_avg:bridging" = "Polarization x Bridging",
  "polarized_aggregatedifferent:bridging" = "Polarization x Bridging",
  
  "diff_self_us_voter_avg:linking" = "Polarization x Linking",
  "diff_self_state_voter_avg:linking" = "Polarization x Linking",
  "polarized_aggregatedifferent:linking" = "Polarization x Linking",
  
  # Covariates
  
  "bmi" = "Body Mass Index",
  "party_7" = "Party Identification (7pt) (Dem-Rep)",
  "age" = "Age",
  "femaleYes" = "Female",
  "income" = "Income", 
  "raceaa" = "Black", 
  "racehispanic" = "Hispanic", 
  "raceasian" = "Asian", 
  "raceother" = "Other race", 
  "somecollegeYes" = "At least some college", 
  "nevermarriedyes" = "Never Married", 
  "employmentnot in labor force" = "Not in Labor Force", 
  "employmentunemployed" = "Unemployed", 
  "religionProtestant" = "Protestant",
  "religionCatholic" = "Catholic", 
  "religionAnother type of Christian" = "Other Christian",
  "religionJewish" = "Jewish", 
  "religionMuslim" = "Muslim",
  "religionNo religion" = "No religion",  
  "religionSome other religion" = "Other religion",
  "uninsuredYes" = "Uninsured",
  "risk_from_smokingyes" = "At Risk from Smoking",
  "urbanurbanized_area" = "Heavily Urbanized Area",
  "urbanurban_cluster" = "Urban Cluster"),
  include.nobs = FALSE,
  custom.gof.rows = list(
    "Mean VIF" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_r2(.)) %>% unlist()
   # "Num. Obs." = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
  #    map(~get_obs(.)) %>% unlist()
  ),
  groups = list("<b>Key Variable</b>" = 1,
                "<b>Individual Social Capital</b>" = 2:4,
                "<b>Individual Interactions</b>" = 5:7,
                "<b>County Social Capital</b>" = 8:10,
                "<b>County Interactions</b>" = 11:13,
                "<b>Basic Controls</b>" = 14:21,
                "<b>Extended Controls</b>" = 22:34)
  
)

```

### Logit

#### Simple Model

```{r}
# Physical Health
## US Level
m1 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "logit")

## US Level
m2 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "logit")

## US Level
m3 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "logit")




m4 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_state_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "logit")

## US Level
m5 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "logit")

## US Level
m6 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "logit")




m7 <- dat %>%
  zelig(formula = freq_mental_distress ~
          polarized_aggregate +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt + state,
        model = "logit")

## US Level
m8 <- dat %>%
  zelig(formula = freq_mental_distress ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +state,
        model = "logit")

## US Level
m9 <- dat %>%
  zelig(formula = freq_mental_distress ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +state,
        model = "logit")

get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}
texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8,m9),
  custom.header = list("Effect of US-Level Polarization" = 1:3,
                       "Effect of State-Level Polarization" = 4:6,
                       "Effect of County-Level Polarization" = 7:9),
  custom.model.names = rep(c("Basic Model", 
                             "Interactions with<br>Individual<br>Social Capital",
                         "Interactions with<br>County<br>Social Capital"), 3),
  stars = c(0.001, 0.01, 0.05, 0.1), 
  single.row = TRUE,
  file = "viz/table_B7.html",
  bold = 0.10,
  caption.above = TRUE,
  caption = "<b>Logit Models of Likelihood of 14 Days or More of Poor Mental Health per Month (n = 2752)</b><br><i>With Fixed Effects by State (n = 51, including DC) and Multiple Imputation<i>",
  custom.coef.map = list(
  "diff_self_us_voter_avg" = "Polarization",
  "diff_self_state_voter_avg" = "Polarization",
  "polarized_aggregatedifferent" = "Polarization",
  "social_trust_index" = "Social Trust",
  "diversity_friendship_network_index" = "Friendship Diversity",
  "help_from_local_govt" = "Help from Local Govt",
  # Individual Level
  "diff_self_us_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "diff_self_state_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "polarized_aggregatedifferent:social_trust_index" = "Polarization x Social Trust",
  
  "diff_self_us_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_state_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "polarized_aggregatedifferent:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_us_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "diff_self_state_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "polarized_aggregatedifferent:help_from_local_govt" = "Polarization x Help from Local Govt",
  
  # Area Level
  "bonding" = "Bonding",
  "bridging" = "Bridging",
  "linking" = "Linking",
  
  "diff_self_us_voter_avg:bonding" = "Polarization x Bonding",
  "diff_self_state_voter_avg:bonding" = "Polarization x Bonding",
  "polarized_aggregatedifferent:bonding" = "Polarization x Bonding",
  
  "diff_self_us_voter_avg:bridging" = "Polarization x Bridging",
  "diff_self_state_voter_avg:bridging" = "Polarization x Bridging",
  "polarized_aggregatedifferent:bridging" = "Polarization x Bridging",
  
  "diff_self_us_voter_avg:linking" = "Polarization x Linking",
  "diff_self_state_voter_avg:linking" = "Polarization x Linking",
  "polarized_aggregatedifferent:linking" = "Polarization x Linking"),
  include.nobs = FALSE,
  custom.gof.rows = list(
    "Mean VIF" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_r2(.)) %>% unlist()
   # "Num. Obs." = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
  #    map(~get_obs(.)) %>% unlist()
  ),
  groups = list("<b>Key Variable</b>" = 1,
                "<b>Individual Social Capital</b>" = 2:4,
                "<b>Individual Interactions</b>" = 5:7,
                "<b>County Social Capital</b>" = 8:10,
                "<b>County Interactions</b>" = 11:13)
  
)

```

#### With Controls

```{r}

## US Level
m1 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_us_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m2 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m3 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")




m4 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_state_voter_avg +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m5 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m6 <- dat %>%
  zelig(formula = freq_mental_distress ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")




m7 <- dat %>%
  zelig(formula = freq_mental_distress ~
          polarized_aggregate +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m8 <- dat %>%
  zelig(formula = freq_mental_distress ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

## US Level
m9 <- dat %>%
  zelig(formula = freq_mental_distress ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "logit")

get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}
texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8,m9),
  custom.header = list("Effect of US-Level Polarization" = 1:3,
                       "Effect of State-Level Polarization" = 4:6,
                       "Effect of County-Level Polarization" = 7:9),
  custom.model.names = rep(c("Basic Model", 
                             "Interactions with<br>Individual<br>Social Capital",
                         "Interactions with<br>County<br>Social Capital"), 3),
  stars = c(0.001, 0.01, 0.05, 0.1), 
  single.row = TRUE,
  file = "viz/table_B8.html",
  bold = 0.10,
  caption.above = TRUE,
  caption = "<b>Logit Models of Likelihood of 14 Days or More of Poor Mental Health per Month (n = 2752)</b><br><i>With Fixed Effects by State (n = 51, including DC) and Multiple Imputation<i>",
  custom.coef.map = list(
  "diff_self_us_voter_avg" = "Polarization",
  "diff_self_state_voter_avg" = "Polarization",
  "polarized_aggregatedifferent" = "Polarization",
  "social_trust_index" = "Social Trust",
  "diversity_friendship_network_index" = "Friendship Diversity",
  "help_from_local_govt" = "Help from Local Govt",
  # Individual Level
  "diff_self_us_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "diff_self_state_voter_avg:social_trust_index" = "Polarization x Social Trust",
  "polarized_aggregatedifferent:social_trust_index" = "Polarization x Social Trust",
  
  "diff_self_us_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_state_voter_avg:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "polarized_aggregatedifferent:diversity_friendship_network_index" = "Polarization x Friendship Diversity",
  "diff_self_us_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "diff_self_state_voter_avg:help_from_local_govt" = "Polarization x Help from Local Govt",
  "polarized_aggregatedifferent:help_from_local_govt" = "Polarization x Help from Local Govt",
  
  # Area Level
  "bonding" = "Bonding",
  "bridging" = "Bridging",
  "linking" = "Linking",
  
  "diff_self_us_voter_avg:bonding" = "Polarization x Bonding",
  "diff_self_state_voter_avg:bonding" = "Polarization x Bonding",
  "polarized_aggregatedifferent:bonding" = "Polarization x Bonding",
  
  "diff_self_us_voter_avg:bridging" = "Polarization x Bridging",
  "diff_self_state_voter_avg:bridging" = "Polarization x Bridging",
  "polarized_aggregatedifferent:bridging" = "Polarization x Bridging",
  
  "diff_self_us_voter_avg:linking" = "Polarization x Linking",
  "diff_self_state_voter_avg:linking" = "Polarization x Linking",
  "polarized_aggregatedifferent:linking" = "Polarization x Linking",
  
  # Covariates
  
  "bmi" = "Body Mass Index",
  "party_7" = "Party Identification (7pt) (Dem-Rep)",
  "age" = "Age",
  "femaleYes" = "Female",
  "income" = "Income", 
  "raceaa" = "Black", 
  "racehispanic" = "Hispanic", 
  "raceasian" = "Asian", 
  "raceother" = "Other race", 
  "somecollegeYes" = "At least some college", 
  "nevermarriedyes" = "Never Married", 
  "employmentnot in labor force" = "Not in Labor Force", 
  "employmentunemployed" = "Unemployed", 
  "religionProtestant" = "Protestant",
  "religionCatholic" = "Catholic", 
  "religionAnother type of Christian" = "Other Christian",
  "religionJewish" = "Jewish", 
  "religionMuslim" = "Muslim",
  "religionNo religion" = "No religion",  
  "religionSome other religion" = "Other religion",
  "uninsuredYes" = "Uninsured",
  "risk_from_smokingyes" = "At Risk from Smoking",
  "urbanurbanized_area" = "Heavily Urbanized Area",
  "urbanurban_cluster" = "Urban Cluster"),
  include.nobs = FALSE,
  custom.gof.rows = list(
    "Mean VIF" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
      map(~get_r2(.)) %>% unlist()
   # "Num. Obs." = list(m1,m2,m3,m4,m5,m6,m7,m8,m9) %>%
  #    map(~get_obs(.)) %>% unlist()
  ),
  groups = list("<b>Key Variable</b>" = 1,
                "<b>Individual Social Capital</b>" = 2:4,
                "<b>Individual Interactions</b>" = 5:7,
                "<b>County Social Capital</b>" = 8:10,
                "<b>County Interactions</b>" = 11:13,
                "<b>Basic Controls</b>" = 14:21,
                "<b>Extended Controls</b>" = 22:34)
  
)

```

## Simulation

### Bonding Social Capital

```{r}
# Install Zelig dependencies
#install.packages(c("AER", "coda", "geepack", 
#                   "maxLik", "MCMCpack", "VGAM"))
library(Zelig)
library(tidyverse)

dat <- read_rds("dataset_mi.rds")

## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


## US Level
m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


# Get simulated quantities of interest at different confidence intervals
mysim <- m1 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       social_trust_index = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bonding = social_trust_index, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bonding, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

viza <- bind_rows(
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bonding", "diff_self_us_voter_avg"),
            y = start %>%
              select(bonding, diff_self_us_voter_avg, median))




# Get simulated quantities of interest at different confidence intervals
mysim <- m2 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       social_trust_index = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bonding = social_trust_index, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bonding, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

vizb <- bind_rows(
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bonding", "diff_self_us_voter_avg"),
            y = start %>%
              select(bonding, diff_self_us_voter_avg, median)) 


# Combine the two
viz <- bind_rows(viza,vizb, .id = "outcome") %>%
  mutate(outcome = outcome %>% recode_factor(
    "1" = "Physical\nHealth", "2" = "Mental\nHealth")) %>%
    mutate(bonding = factor(bonding) %>% 
           dplyr::recode_factor(
             "0" = "Weak Social Trust (0)",
             "1" = "Strong Social Trust (1)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))


viz %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(outcome~bonding, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_3.png", dpi = 500, width = 8, height = 8)



viz %>%
  filter(outcome == "Physical\nHealth") %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(~bonding, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Physical Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_bonding_indiv_zelig_phys.png", dpi = 500, width = 8, height = 5)


rm(list = ls())
```

### Bridging Social Capital

```{r}
# Install Zelig dependencies
#install.packages(c("AER", "coda", "geepack", 
#                   "maxLik", "MCMCpack", "VGAM"))
library(Zelig)
library(tidyverse)

dat <- read_rds("dataset_mi.rds")

## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")



# Get simulated quantities of interest at different confidence intervals
mysim <- m1 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       diversity_friendship_network_index = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bridging = diversity_friendship_network_index, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bridging, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

viza <- bind_rows(
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bridging", "diff_self_us_voter_avg"),
            y = start %>%
              select(bridging, diff_self_us_voter_avg, median)) %>%
  mutate(bridging = factor(bridging) %>% 
           dplyr::recode_factor(
             "0" = "Weak Friendship Diversity (0)",
             "1" = "Strong Friendship Diversity (1)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))





## US Level
m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")



# Get simulated quantities of interest at different confidence intervals
mysim <- m2 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       diversity_friendship_network_index = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bridging = diversity_friendship_network_index, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bridging, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

vizb <- bind_rows(
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bridging", "diff_self_us_voter_avg"),
            y = start %>%
              select(bridging, diff_self_us_voter_avg, median)) %>%
  mutate(bridging = factor(bridging) %>% 
           dplyr::recode_factor(
             "0" = "Weak Friendship Diversity (0)",
             "1" = "Strong Friendship Diversity (1)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))


# Combine the two
viz <- bind_rows(viza,vizb, .id = "outcome") %>%
  mutate(outcome = outcome %>% recode_factor(
    "1" = "Physical\nHealth", "2" = "Mental\nHealth"))


viz %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(outcome~bridging, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_4.png", dpi = 500, width = 8, height = 8)



viz %>%
  filter(outcome == "Physical\nHealth") %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(~bridging, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Physical Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_bridging_indiv_zelig_phys.png", dpi = 500, width = 8, height = 5)


# viz %>%
#  filter(outcome == "Physical\nHealth") %>%
#  filter(level == "99.9") %>%
#  filter(diff_self_us_voter_avg == min(diff_self_us_voter_avg) |
#           diff_self_us_voter_avg == max(diff_self_us_voter_avg)) %>%
#  filter(bridging == "Strong Friendship Diversity (1)") %>%
#  distinct() 
rm(list = ls())
```



### Linking Social Capital

```{r}
# Install Zelig dependencies
#install.packages(c("AER", "coda", "geepack", 
#                   "maxLik", "MCMCpack", "VGAM"))
library(Zelig)
library(tidyverse)

dat <- read_rds("dataset_mi.rds")

## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


## US Level
m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


# Get simulated quantities of interest at different confidence intervals
mysim <- m1 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       help_from_local_govt = c(-2, 2)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         linking = help_from_local_govt, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(linking, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

viza <- bind_rows(
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("linking", "diff_self_us_voter_avg"),
            y = start %>%
              select(linking, diff_self_us_voter_avg, median))




# Get simulated quantities of interest at different confidence intervals
mysim <- m2 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       help_from_local_govt = c(-2, 2)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         linking = help_from_local_govt, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(linking, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

vizb <- bind_rows(
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("linking", "diff_self_us_voter_avg"),
            y = start %>%
              select(linking, diff_self_us_voter_avg, median)) 


# Combine the two
viz <- bind_rows(viza,vizb, .id = "outcome") %>%
  mutate(outcome = outcome %>% recode_factor(
    "1" = "Physical\nHealth", "2" = "Mental\nHealth")) %>%
    mutate(linking = factor(linking) %>% 
           dplyr::recode_factor(
             "-2" = "Weak Trust in Local Government (-2)",
             "2" = "Strong Trust in Local Government (2)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))


viz %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(outcome~linking, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_c2.png", dpi = 500, width = 9, height = 8)



viz %>%
  filter(outcome == "Physical\nHealth") %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(~linking, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Physical Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_linking_indiv_zelig_phys.png", dpi = 500, width = 9, height = 5)


rm(list = ls())
```

## First Differences

### Simulation Procedure
```{r}
library(tidyverse)
library(Zelig)

dat <- read_rds("dataset_mi.rds")

## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")
# State
m2 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")
## County
m3 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")



## US Level
m4 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m5 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


## US Level
m6 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


# Write a quick function to extract first differences
get_fd = function(mysimulation){
  mysimulation %>%
    with(sim.out$x1$fd) %>%
    # This give us first differences from each of the 5 imputations, 200 each
    map_dfr(~data.frame(fd = .)) %>%
    return()
}


# Get simulated quantities of interest at different confidence intervals
list(
  # Bonding Social Capital Interaction
  # Physical Health
  m1 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             social_trust_index = 0),
        setx1(., diff_self_us_voter_avg = 10,
              social_trust_index = 1)),
  m2 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             social_trust_index = 0),
        setx1(., diff_self_state_voter_avg = 10,
              social_trust_index = 1)),
  m3 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             social_trust_index = 0),
        setx1(., polarized_aggregate = "different",
              social_trust_index = 1)),
  # Mental Health
  m4 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             social_trust_index = 0),
        setx1(., diff_self_us_voter_avg = 10,
              social_trust_index = 1)),
  m5 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             social_trust_index = 0),
        setx1(., diff_self_state_voter_avg = 10,
              social_trust_index = 1)),
  m6 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             social_trust_index = 0),
        setx1(., polarized_aggregate = "different",
              social_trust_index = 1)),

  # Bridging Social Capital Interaciton  
  # Physical Health
  m1 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             diversity_friendship_network_index = 0),
        setx1(., diff_self_us_voter_avg = 10,
              diversity_friendship_network_index = 1)),
  m2 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             diversity_friendship_network_index = 0),
        setx1(., diff_self_state_voter_avg = 10,
              diversity_friendship_network_index = 1)),
  m3 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             diversity_friendship_network_index = 0),
        setx1(., polarized_aggregate = "different",
              diversity_friendship_network_index = 1)),
  # Mental Health
  m4 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             diversity_friendship_network_index = 0),
        setx1(., diff_self_us_voter_avg = 10,
              diversity_friendship_network_index = 1)),
  m5 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             diversity_friendship_network_index = 0),
        setx1(., diff_self_state_voter_avg = 10,
              diversity_friendship_network_index = 1)),
  m6 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             diversity_friendship_network_index = 0),
        setx1(., polarized_aggregate = "different",
              diversity_friendship_network_index = 1)),
  # Linking Social Capital Interaction
  # Physical Health
  m1 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             help_from_local_govt = -2),
        setx1(., diff_self_us_voter_avg = 10,
              help_from_local_govt = 2)),
  m2 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             help_from_local_govt = -2),
        setx1(., diff_self_state_voter_avg = 10,
              help_from_local_govt = 2)),
  m3 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             help_from_local_govt = -2),
        setx1(., polarized_aggregate = "different",
              help_from_local_govt = 2)),
  # Mental Health
   m4 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             help_from_local_govt = -2),
        setx1(., diff_self_us_voter_avg = 10,
              help_from_local_govt = 2)),
  m5 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             help_from_local_govt = -2),
        setx1(., diff_self_state_voter_avg = 10,
              help_from_local_govt = 2)),
  m6 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             help_from_local_govt = -2),
        setx1(., polarized_aggregate = "different",
              help_from_local_govt = 2))
) %>%
  map_dfr(~get_fd(.), .id = "model") %>%
  mutate(
    # Classify by social capital
    social_capital = case_when(
    model %in% c(1:6) ~ "Bonding (Social Trust Index)",
    model %in% c(7:12) ~ "Bridging (Friendship Network Diversity Index)",
    model %in% c(13:18) ~ "Linking (Trust in Local Government)"),
    # By polarization
    polarization = case_when(
      model %in% c(1,4,7,10,13,16) ~ "US-Level",
      model %in% c(2,5,8,11,14,17) ~ "State-Level",
      model %in% c(3,6,9,12,15,18) ~ "County-Level"),
    # By outcome
    outcome = case_when(
      model %in% c(1,2,3,
                   7,8,9,
                   13,14,15) ~ "Physical Health",
      model %in% c(4,5,6,
                   10,11,12,
                   16,17,18) ~ "Mental Health")) %>%
  saveRDS("fd.rds")
```

### Visualize

```{r}
mysim <- read_rds("fd.rds") %>%
  mutate(outcome = factor(outcome, levels = c("Physical Health", "Mental Health")))

mysim %>%
  # Grab just the 95% most common first differences
  group_by(model) %>%
  filter(fd > quantile(fd, 0.025),
         fd < quantile(fd, 0.975)) %>%
  ungroup() %>%
  ggplot(mapping = aes(x = polarization, y = fd, color = outcome)) +
  geom_jitter(alpha = 0.2, 
              position = position_jitterdodge(seed = 1, dodge.width = 1)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  geom_violin(draw_quantiles = 0.5, fill = "white") +
  facet_wrap(~social_capital, ncol = 1) +
  coord_flip() +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        legend.position = "bottom") +
  scale_color_grey(start = 0, end = 0.7) +
  labs(x = "Level Political Polarization",
       y = "Expected Change in Days of Poor Health per Month",
       color = "Outcome") +
  ggsave("viz/fig_2_early.png", dpi = 500, width = 6, height = 7)
```

### Tabulate

```{r}
mysim <- read_rds("fd.rds") %>%
  mutate(outcome = factor(outcome, levels = c("Physical Health", "Mental Health"))) %>%
  mutate(social_capital = social_capital %>% recode_factor(
    "Linking (Trust in Local Government)" = "<b>Linking</b><br>(Trust in Local<br>Government)",
    "Bridging (Friendship Network Diversity Index)" = "<b>Bridging</b><br>(Friendship Network<br>Diversity Index)",
    "Bonding (Social Trust Index)" = "<b>Bonding</b><br>(Social Trust<br>Index)")) %>%
  mutate(polarization = polarization %>% str_remove("[-]Level")) %>%
  # Grab just the 95% most common first differences
  group_by(model, polarization, outcome, social_capital) %>%
  summarize(
    median = median(fd),
    lower = quantile(fd, 0.025),
    upper = quantile(fd, 0.975)) %>%
  ungroup() %>%
  # Identify whether CI crosses zero
  mutate(sig = case_when(
    lower < 0 & upper > 0 ~ "",
    lower > 0 & upper > 0 ~ "*",
    lower < 0 & upper < 0 ~ "*")) %>%
  mutate_at(vars(median, lower, upper), funs(round(.,2))) %>%
  mutate(label = paste(median, sig, "\n(",
                       lower, " to ", 
                       upper, ")", sep = ""))
library(ggtext)
mysim %>%
  ggplot(mapping = aes(x = polarization, y = social_capital,
                       label = label)) +
  geom_tile(color = "darkgrey", fill = "white") +
  geom_text() +
  facet_wrap(~outcome, ncol = 2) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        legend.position = "bottom",
        axis.text.y = element_markdown(size = 10),
        plot.subtitle = element_text(hjust = 0.5)) +
  labs(x = "Level Political Polarization",
       y = "Type of Social Capital",
       subtitle = "Expected Change in Days of Poor Health per Month") +
  ggsave("viz/table_c1.png", dpi = 500, width = 9, height = 3.5)
```



## County Simulations in Individual Models

### Bonding Social Capital

```{r}
# Install Zelig dependencies
#install.packages(c("AER", "coda", "geepack", 
#                   "maxLik", "MCMCpack", "VGAM"))
library(Zelig)
library(tidyverse)

dat <- read_rds("dataset_mi.rds")

## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


## US Level
m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


# Get simulated quantities of interest at different confidence intervals
mysim <- m1 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       bonding = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bonding = bonding, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bonding, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

viza <- bind_rows(
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bonding", "diff_self_us_voter_avg"),
            y = start %>%
              select(bonding, diff_self_us_voter_avg, median))




# Get simulated quantities of interest at different confidence intervals
mysim <- m2 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       bonding = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bonding = bonding, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bonding, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bonding, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

vizb <- bind_rows(
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bonding, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bonding", "diff_self_us_voter_avg"),
            y = start %>%
              select(bonding, diff_self_us_voter_avg, median)) 


# Combine the two
viz <- bind_rows(viza,vizb, .id = "outcome") %>%
  mutate(outcome = outcome %>% recode_factor(
    "1" = "Physical\nHealth", "2" = "Mental\nHealth")) %>%
    mutate(bonding = factor(bonding) %>% 
           dplyr::recode_factor(
             "0" = "Weak County\nBonding Social Capital (0)",
             "1" = "Strong County\nBonding Social Capital (1)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))


viz %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(outcome~bonding, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_bonding_indiv_county_zelig.png", dpi = 500, width = 8, height = 8)



viz %>%
  filter(outcome == "Physical\nHealth") %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(~bonding, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Physical Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_bonding_indiv_county_zelig_phys.png", dpi = 500, width = 8, height = 5)


rm(list = ls())
```

### Bridging Social Capital

```{r}
# Install Zelig dependencies
#install.packages(c("AER", "coda", "geepack", 
#                   "maxLik", "MCMCpack", "VGAM"))
library(Zelig)
library(tidyverse)

dat <- read_rds("dataset_mi.rds")

## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")



# Get simulated quantities of interest at different confidence intervals
mysim <- m1 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       bridging = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bridging = bridging, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bridging, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

viza <- bind_rows(
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bridging", "diff_self_us_voter_avg"),
            y = start %>%
              select(bridging, diff_self_us_voter_avg, median)) %>%
  mutate(bridging = factor(bridging) %>% 
           dplyr::recode_factor(
             "0" = "Weak County\nBridging Social Capital (0)",
             "1" = "Strong County\nBridging Social Capital (1)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))





## US Level
m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")



# Get simulated quantities of interest at different confidence intervals
mysim <- m2 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       bridging = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bridging = bridging, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bridging, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bridging, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

vizb <- bind_rows(
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bridging, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bridging", "diff_self_us_voter_avg"),
            y = start %>%
              select(bridging, diff_self_us_voter_avg, median)) %>%
  mutate(bridging = factor(bridging) %>% 
           dplyr::recode_factor(
             "0" = "Weak County\nBridging Social Capital (0)",
             "1" = "Strong County\nBridging Social Capital (1)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))


# Combine the two
viz <- bind_rows(viza,vizb, .id = "outcome") %>%
  mutate(outcome = outcome %>% recode_factor(
    "1" = "Physical\nHealth", "2" = "Mental\nHealth"))


viz %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(outcome~bridging, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_bridging_indiv_county_zelig.png", dpi = 500, width = 8, height = 8)



viz %>%
  filter(outcome == "Physical\nHealth") %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(~bridging, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Physical Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_bridging_indiv_county_zelig_phys.png", dpi = 500, width = 8, height = 5)


# viz %>%
#  filter(outcome == "Physical\nHealth") %>%
#  filter(level == "99.9") %>%
#  filter(diff_self_us_voter_avg == min(diff_self_us_voter_avg) |
#           diff_self_us_voter_avg == max(diff_self_us_voter_avg)) %>%
#  filter(bridging == "Strong Friendship Diversity (1)") %>%
#  distinct() 
rm(list = ls())
```



### Linking Social Capital

```{r}
# Install Zelig dependencies
#install.packages(c("AER", "coda", "geepack", 
#                   "maxLik", "MCMCpack", "VGAM"))
library(Zelig)
library(tidyverse)

dat <- read_rds("dataset_mi.rds")

## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


## US Level
m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


# Get simulated quantities of interest at different confidence intervals
mysim <- m1 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       linking = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         linking = linking, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(linking, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

viza <- bind_rows(
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("linking", "diff_self_us_voter_avg"),
            y = start %>%
              select(linking, diff_self_us_voter_avg, median))




# Get simulated quantities of interest at different confidence intervals
mysim <- m2 %>%
  setx(diff_self_us_voter_avg = seq(from = 0, to = 10,length.out = 50), 
       linking = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         linking = linking, diff_self_us_voter_avg)

start <- bind_rows(
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(linking, diff_self_us_voter_avg) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(linking, diff_self_us_voter_avg),
              names_from = type, values_from = c(lower, upper))

vizb <- bind_rows(
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
   middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(linking, diff_self_us_voter_avg, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("linking", "diff_self_us_voter_avg"),
            y = start %>%
              select(linking, diff_self_us_voter_avg, median)) 


# Combine the two
viz <- bind_rows(viza,vizb, .id = "outcome") %>%
  mutate(outcome = outcome %>% recode_factor(
    "1" = "Physical\nHealth", "2" = "Mental\nHealth")) %>%
    mutate(linking = factor(linking) %>% 
           dplyr::recode_factor(
             "0" = "Weak County\nLinking Social Capital (0)",
             "1" = "Strong County\nLinking Social Capital (1)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))


viz %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(outcome~linking, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_linking_indiv_county_zelig.png", dpi = 500, width = 9, height = 8)



viz %>%
  filter(outcome == "Physical\nHealth") %>%
  ggplot(mapping = aes(x = diff_self_us_voter_avg, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(~linking, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "Perceived Political Differences\ncompared to Average US Voter (Least to Most)",
       y = "Expected Days of Poor Physical Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_linking_indiv_county_zelig_phys.png", dpi = 500, width = 9, height = 5)


rm(list = ls())
```

## First Differences

### Simulation Procedure

```{r}
library(tidyverse)
library(Zelig)

dat <- read_rds("dataset_mi.rds")

## US Level
m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")
# State
m2 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")
## County
m3 <- dat %>%
  zelig(formula = days_poor_physical_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")



## US Level
m4 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_us_voter_avg * social_trust_index +
          diff_self_us_voter_avg * diversity_friendship_network_index +
          diff_self_us_voter_avg * help_from_local_govt +
          diff_self_us_voter_avg * bonding +
          diff_self_us_voter_avg * bridging +
          diff_self_us_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")

## US Level
m5 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          diff_self_state_voter_avg * social_trust_index +
          diff_self_state_voter_avg * diversity_friendship_network_index +
          diff_self_state_voter_avg * help_from_local_govt +
          diff_self_state_voter_avg * bonding +
          diff_self_state_voter_avg * bridging +
          diff_self_state_voter_avg * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


## US Level
m6 <- dat %>%
  zelig(formula = days_poor_mental_health ~
          polarized_aggregate * social_trust_index +
          polarized_aggregate * diversity_friendship_network_index +
          polarized_aggregate * help_from_local_govt +
          polarized_aggregate * bonding +
          polarized_aggregate * bridging +
          polarized_aggregate * linking +
          # Social capital
          social_trust_index + diversity_friendship_network_index + 
          help_from_local_govt +
          # Demographics
          bmi + party_7 + age + race + income2 + female + somecollege +
          nevermarried + employment + uninsured + religion +
          risk_from_smoking + urban + state,
        model = "negbin")


# Write a quick function to extract first differences
get_fd = function(mysimulation){
  mysimulation %>%
    with(sim.out$x1$fd) %>%
    # This give us first differences from each of the 5 imputations, 200 each
    map_dfr(~data.frame(fd = .)) %>%
    return()
}


# Get simulated quantities of interest at different confidence intervals
list(
  # Bonding Social Capital Interaction
  # Physical Health
  m1 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             social_trust_index = 0),
        setx1(., diff_self_us_voter_avg = 10,
              social_trust_index = 1)),
  m2 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             social_trust_index = 0),
        setx1(., diff_self_state_voter_avg = 10,
              social_trust_index = 1)),
  m3 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             social_trust_index = 0),
        setx1(., polarized_aggregate = "different",
              social_trust_index = 1)),
  # Mental Health
  m4 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             social_trust_index = 0),
        setx1(., diff_self_us_voter_avg = 10,
              social_trust_index = 1)),
  m5 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             social_trust_index = 0),
        setx1(., diff_self_state_voter_avg = 10,
              social_trust_index = 1)),
  m6 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             social_trust_index = 0),
        setx1(., polarized_aggregate = "different",
              social_trust_index = 1)),

  # Bridging Social Capital Interaciton  
  # Physical Health
  m1 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             diversity_friendship_network_index = 0),
        setx1(., diff_self_us_voter_avg = 10,
              diversity_friendship_network_index = 1)),
  m2 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             diversity_friendship_network_index = 0),
        setx1(., diff_self_state_voter_avg = 10,
              diversity_friendship_network_index = 1)),
  m3 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             diversity_friendship_network_index = 0),
        setx1(., polarized_aggregate = "different",
              diversity_friendship_network_index = 1)),
  # Mental Health
  m4 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             diversity_friendship_network_index = 0),
        setx1(., diff_self_us_voter_avg = 10,
              diversity_friendship_network_index = 1)),
  m5 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             diversity_friendship_network_index = 0),
        setx1(., diff_self_state_voter_avg = 10,
              diversity_friendship_network_index = 1)),
  m6 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             diversity_friendship_network_index = 0),
        setx1(., polarized_aggregate = "different",
              diversity_friendship_network_index = 1)),
  # Linking Social Capital Interaction
  # Physical Health
  m1 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             help_from_local_govt = -2),
        setx1(., diff_self_us_voter_avg = 10,
              help_from_local_govt = 2)),
  m2 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             help_from_local_govt = -2),
        setx1(., diff_self_state_voter_avg = 10,
              help_from_local_govt = 2)),
  m3 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             help_from_local_govt = -2),
        setx1(., polarized_aggregate = "different",
              help_from_local_govt = 2)),
  # Mental Health
   m4 %>%
    sim(., 
        setx(., diff_self_us_voter_avg = 0,
             help_from_local_govt = -2),
        setx1(., diff_self_us_voter_avg = 10,
              help_from_local_govt = 2)),
  m5 %>%
    sim(., 
        setx(., diff_self_state_voter_avg = 0,
             help_from_local_govt = -2),
        setx1(., diff_self_state_voter_avg = 10,
              help_from_local_govt = 2)),
  m6 %>%
    sim(., 
        setx(., polarized_aggregate = "same",
             help_from_local_govt = -2),
        setx1(., polarized_aggregate = "different",
              help_from_local_govt = 2))
) %>%
  map_dfr(~get_fd(.), .id = "model") %>%
  mutate(
    # Classify by social capital
    social_capital = case_when(
    model %in% c(1:6) ~ "Bonding (Social Trust Index)",
    model %in% c(7:12) ~ "Bridging (Friendship Network Diversity Index)",
    model %in% c(13:18) ~ "Linking (Trust in Local Government)"),
    # By polarization
    polarization = case_when(
      model %in% c(1,4,7,10,13,16) ~ "US-Level",
      model %in% c(2,5,8,11,14,17) ~ "State-Level",
      model %in% c(3,6,9,12,15,18) ~ "County-Level"),
    # By outcome
    outcome = case_when(
      model %in% c(1,2,3,
                   7,8,9,
                   13,14,15) ~ "Physical Health",
      model %in% c(4,5,6,
                   10,11,12,
                   16,17,18) ~ "Mental Health")) %>%
  saveRDS("fd.rds")
```

### Visualize

```{r}
mysim <- read_rds("fd.rds") %>%
  mutate(outcome = factor(outcome, levels = c("Physical Health", "Mental Health")))

mysim %>%
  # Grab just the 95% most common first differences
  group_by(model) %>%
  filter(fd > quantile(fd, 0.025),
         fd < quantile(fd, 0.975)) %>%
  ungroup() %>%
  ggplot(mapping = aes(x = polarization, y = fd, color = outcome)) +
  geom_jitter(alpha = 0.2, 
              position = position_jitterdodge(seed = 1, dodge.width = 1)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  geom_violin(draw_quantiles = 0.5, fill = "white") +
  facet_wrap(~social_capital, ncol = 1) +
  coord_flip() +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        legend.position = "bottom") +
  scale_color_grey(start = 0, end = 0.7) +
  labs(x = "Level Political Polarization",
       y = "Expected Change in Days of Poor Health per Month",
       color = "Outcome") +
  ggsave("viz/fig_2_early.png", dpi = 500, width = 6, height = 7)





mysim %>%
  mutate(polarization = polarization %>% recode_factor(
    "County-Level" = "County Actual\nPolarization (3)",
    "State-Level" = "State Perceived\nPolarization (2)",
    "US-Level" = "US Perceived\nPolarization (1)"),
    social_capital = social_capital %>% recode_factor(
      "Bonding (Social Trust Index)" = "Bonding Social Capital (Social Trust Index)", 
      "Bridging (Friendship Network Diversity Index)" = "Bridging Social Capital (Friendship Network Diversity Index)", 
      "Linking (Trust in Local Government)" = "Linking Social Capital (Trust in Local Government)")) %>%
  # Grab just the 95% most common first differences
  group_by(model) %>%
  filter(fd > quantile(fd, 0.025),
         fd < quantile(fd, 0.975)) %>%
  ungroup() %>%
  ggplot(mapping = aes(x = polarization, y = fd, color = outcome)) +
  geom_jitter(alpha = 0.2, 
              position = position_jitterdodge(seed = 1, dodge.width = 1)) +
  geom_hline(yintercept = 0, linetype = "dashed", color = "black") +
  geom_violin(draw_quantiles = 0.5, fill = "white") +
  facet_wrap(~social_capital, ncol = 1) +
  coord_flip() +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        legend.position = "bottom",
        plot.subtitle = element_text(hjust = 0.5)) +
  scale_color_grey(start = 0, end = 0.7) +
  labs(x = "Measure of Individual-Level Political Polarization\n(by Measure No. in Table 1)",
       y = "Expected Change in Days of Poor Health per Month",
       color = "Outcome",
       subtitle = "Effect by Type of Individual-Level Social Capital") +
  ggsave("viz/fig_2.png", dpi = 500, width = 6.5, height = 7)
```

### Tabulate

```{r}
mysim <- read_rds("fd.rds") %>%
  mutate(outcome = factor(outcome, levels = c("Physical Health", "Mental Health"))) %>%
  mutate(social_capital = social_capital %>% recode_factor(
    "Linking (Trust in Local Government)" = "<b>Linking</b><br>(Trust in Local<br>Government)",
    "Bridging (Friendship Network Diversity Index)" = "<b>Bridging</b><br>(Friendship Network<br>Diversity Index)",
    "Bonding (Social Trust Index)" = "<b>Bonding</b><br>(Social Trust<br>Index)")) %>%
  mutate(polarization = polarization %>% str_remove("[-]Level")) %>%
  # Grab just the 95% most common first differences
  group_by(model, polarization, outcome, social_capital) %>%
  summarize(
    median = median(fd),
    lower = quantile(fd, 0.025),
    upper = quantile(fd, 0.975)) %>%
  ungroup() %>%
  # Identify whether CI crosses zero
  mutate(sig = case_when(
    lower < 0 & upper > 0 ~ "",
    lower > 0 & upper > 0 ~ "*",
    lower < 0 & upper < 0 ~ "*")) %>%
  mutate_at(vars(median, lower, upper), funs(round(.,2))) %>%
  mutate(label = paste(median, sig, "\n(",
                       lower, " to ", 
                       upper, ")", sep = ""))
library(ggtext)
mysim %>%
  ggplot(mapping = aes(x = polarization, y = social_capital,
                       label = label)) +
  geom_tile(color = "darkgrey", fill = "white") +
  geom_text() +
  facet_wrap(~outcome, ncol = 2) +
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(fill = NA, color = "black"),
        legend.position = "bottom",
        axis.text.y = element_markdown(size = 10),
        plot.subtitle = element_text(hjust = 0.5)) +
  labs(x = "Level Political Polarization",
       y = "Type of Social Capital",
       subtitle = "Expected Change in Days of Poor Health per Month") +
  ggsave("viz/table_c1.png", dpi = 500, width = 9, height = 3.5)
```







# 6. County Models


## 6.1 Simple Models

```{r}

dat <- read_rds("county_dataset_mi.rds")
# Let's use log-OLS models to model the distribution of these count-rate-outcomes

# County Level Models
m1 <- dat %>%  
  zelig(formula = days_poor_physical_health ~  
        partisan_gap_2016 +
        bonding + bridging + linking + state, 
        model = "gamma")
# Even after adding state fixed effects, the VIF is still quite low, always below 10
m1$zelig.out$z.out[[1]] %>% car::vif() %>% .[,3] %>% .^2

# We need to use a gamma because the outcome is positive only (no negative values possible)
# doing so GREATLY reduces the size of residuals.
#data.frame(residuals = m1$zelig.out$z.out[[1]]$residuals) %>%
#  ggplot(mapping = aes(x = residuals)) +
#  geom_histogram()


# The highest VIF is pop_never_married at 5.5, but when removed from the model, 
# it does not change the results
# m1 %>% from_zelig_model() %>% car::vif()

m2 <- dat %>%
  zelig(formula = days_poor_physical_health ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking +state,
     model = "gamma")


# County Level Models
m3 <- dat %>%  
  zelig(formula = frequent_phys_distress ~  
        partisan_gap_2016 +
        bonding + bridging + linking + state, 
        model = "gamma")
m3$zelig.out$z.out[[1]] %>% car::vif() %>% .[,3] %>% .^2

# The highest VIF is pop_never_married at 5.5, but when removed from the model, 
# it does not change the results

m4 <- dat %>%
  zelig(formula = frequent_phys_distress ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking + state,
     model = "gamma")



# County Level Models
m5 <- dat %>%
  zelig(formula = days_poor_mental_health ~  
        partisan_gap_2016 +
        bonding + bridging + linking + state,
     model = "gamma")
#m5$zelig.out$z.out[[1]] %>% car::vif() %>% .[,3] %>% .^2


m6 <- dat %>%
  zelig(formula = days_poor_mental_health ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking + state,
     model = "gamma")

# County Level Models
m7 <- dat %>%
  zelig(formula = frequent_ment_distress ~  
        partisan_gap_2016 +
        bonding + bridging + linking + state,
     model = "gamma")
m7$zelig.out$z.out[[1]] %>% car::vif() %>% .[,3] %>% .^2

m8 <- dat %>%
  zelig(formula = frequent_ment_distress ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking + state,
     model = "gamma")


library(texreg)

labels = list(
  "partisan_gap_2016" = "Polarization",
  "bonding" = "Bonding Social Capital",
  "bridging" = "Bridging Social Capital",
  "linking" = "Linking Social Capital",
  "partisan_gap_2016:bonding" = "Polarization x Bonding Social Capital",
  "partisan_gap_2016:bridging" = "Polarization x Bridging Social Capital",
  "partisan_gap_2016:linking" = "Polarization x Linking Social Capital")

#dat$imputations$imp1$urban %>% unique()

get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

# Produce table
texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8),
  file = "viz/table_B9.html",
  caption.above = TRUE,
  caption = "<b>Gamma Models of Physical and Mental Health Outcomes per County (n = 3142)</b><br><i>with Fixed Effects by State (n = 51, including DC) and Multiple Imputation</i>",
  single.row = TRUE,
  custom.header = list("Days of Poor Physical Health" = 1:2,
                       "(%) Frequent Physical Distress" = 3:4,
                       "Days of Poor Mental Health" = 5:6,
                       "(%) Frequent Mental Distress" = 7:8),
  custom.model.names = c("Basic", "Interactions", "Basic", "Interactions",
                         "Basic", "Interactions", "Basic", "Interactions"),
  custom.coef.map = labels,
  groups = list("<b>Independent Variables</b>" = 1,
                "<b>County Social Capital</b>" = 2:4,
             "<b>County Interactions</b>" = 5:7),
  bold = 0.10, 
  stars = c(0.001, 0.01, 0.05, 0.10),
  include.nobs = FALSE,
  
 custom.gof.rows = list(
    "Mean VIF" =   list(m1,m2,m3,m4,m5,m6,m7,m8) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" =   list(m1,m2,m3,m4,m5,m6,m7,m8) %>%
      map(~get_r2(.)) %>% unlist()
  )
)
```

## 6.2 With Controls

```{r}

dat <- read_rds("county_dataset_mi.rds")
# Let's use log-OLS models to model the distribution of these count-rate-outcomes

# County Level Models
m1 <- dat %>%  
  zelig(formula = days_poor_physical_health ~  
        partisan_gap_2016 +
        bonding + bridging + linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state, 
        model = "gamma")
# Even after adding state fixed effects, the VIF is still quite low, always below 10
m1$zelig.out$z.out[[1]] %>% car::vif() %>% .[,3] %>% .^2

# We need to use a gamma because the outcome is positive only (no negative values possible)
# doing so GREATLY reduces the size of residuals.
#data.frame(residuals = m1$zelig.out$z.out[[1]]$residuals) %>%
#  ggplot(mapping = aes(x = residuals)) +
#  geom_histogram()


# The highest VIF is pop_never_married at 5.5, but when removed from the model, 
# it does not change the results
# m1 %>% from_zelig_model() %>% car::vif()

m2 <- dat %>%
  zelig(formula = days_poor_physical_health ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")


# County Level Models
m3 <- dat %>%  
  zelig(formula = frequent_phys_distress ~  
        partisan_gap_2016 +
        bonding + bridging + linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured + 
          urban + state, 
        model = "gamma")
m3$zelig.out$z.out[[1]] %>% car::vif() %>% .[,3] %>% .^2

# The highest VIF is pop_never_married at 5.5, but when removed from the model, 
# it does not change the results

m4 <- dat %>%
  zelig(formula = frequent_phys_distress ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")



# County Level Models
m5 <- dat %>%
  zelig(formula = days_poor_mental_health ~  
        partisan_gap_2016 +
        bonding + bridging + linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")
#m5$zelig.out$z.out[[1]] %>% car::vif() %>% .[,3] %>% .^2


m6 <- dat %>%
  zelig(formula = days_poor_mental_health ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")

# County Level Models
m7 <- dat %>%
  zelig(formula = frequent_ment_distress ~  
        partisan_gap_2016 +
        bonding + bridging + linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")
m7$zelig.out$z.out[[1]] %>% car::vif() %>% .[,3] %>% .^2

m8 <- dat %>%
  zelig(formula = frequent_ment_distress ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")

library(texreg)

labels = list(
  "partisan_gap_2016" = "Polarization",
  "bonding" = "Bonding Social Capital",
  "bridging" = "Bridging Social Capital",
  "linking" = "Linking Social Capital",
  "partisan_gap_2016:bonding" = "Polarization x Bonding Social Capital",
  "partisan_gap_2016:bridging" = "Polarization x Bridging Social Capital",
  "partisan_gap_2016:linking" = "Polarization x Linking Social Capital",
  "obesity" = "% Obese",
  "democrat_2016" = "% Voted Democrat",
  "median_age" = "Median Age",
  "pop_female" = "% Women",
  "median_household_income" = "Median Household Income",
  "pop_black" = "% Black",
  "pop_hisplat" = "% Hispanic / Latino (log)",
  "pop_asian" = "% Asian",
  "pop_natam" = "% Native American",
  "pop_pacific" = "% Hawaiian /Pacific Islander",
  "pop_some_college" = "% At least some college",
  "pop_never_married" = "% Never Married (log)",
  "pop_unemployed" = "Unemployment Rate",
  "protestant" = "Protestant per 1000 residents",
  "catholic" = "Catholics per 1000 residents",
  "jewish" = "Jews per 1000 residents",
  "muslim" = "Muslims per 1000 residents",
  "other_adherents" = "Other religious adherents per 1000 residents",
  "uninsured" = "% Uninsured",
  "urbanurbanized_area" = "Heavily Urbanized Area",
  "urbanurban_cluster" = "Urban Cluster")

#dat$imputations$imp1$urban %>% unique()

get_gvif = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~(car::vif(.)[,3]^2) %>% mean() ) %>%
    unlist() %>%
    max() %>%
    return()
}

get_r2 = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map_dfr(~performance::r2_nagelkerke(.)) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

get_obs = function(myzeligmodel){
  myzeligmodel$zelig.out$z.out %>%
    map(~broom::glance(.)$nobs) %>%
    unlist() %>%
    unname() %>%
    min() %>%
    return()
}

# Produce table
texreg::htmlreg(
  list(m1,m2,m3,m4,m5,m6,m7,m8),
  file = "viz/table_B10.html",
  caption.above = TRUE,
  caption = "<b>Gamma Models of Physical and Mental Health Outcomes per County (n = 3142)</b><br><i>with Fixed Effects by State (n = 51, including DC) and Multiple Imputation</i>",
  single.row = TRUE,
  custom.header = list("Days of Poor Physical Health" = 1:2,
                       "(%) Frequent Physical Distress" = 3:4,
                       "Days of Poor Mental Health" = 5:6,
                       "(%) Frequent Mental Distress" = 7:8),
  custom.model.names = c("Basic", "Interactions", "Basic", "Interactions",
                         "Basic", "Interactions", "Basic", "Interactions"),
  custom.coef.map = labels,
  groups = list("<b>Independent Variables</b>" = 1,
                "<b>County Social Capital</b>" = 2:4,
             "<b>County Interactions</b>" = 5:7,
              "<b>Basic Controls</b>" = 8:17,
             "<b>Extended Controls</b>" = 18:27),
  bold = 0.10, 
  stars = c(0.001, 0.01, 0.05, 0.10),
  include.nobs = FALSE,
  
 custom.gof.rows = list(
    "Mean VIF" =   list(m1,m2,m3,m4,m5,m6,m7,m8) %>%
      map(~get_gvif(.)) %>% unlist(),
    "Nagelkerke's R2" =   list(m1,m2,m3,m4,m5,m6,m7,m8) %>%
      map(~get_r2(.)) %>% unlist()
  )
)
```

## 6.3 Simulation

### Bonding
```{r}
dat <- read_rds("county_dataset_mi.rds")

m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")


m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")

library(Zelig)
# First for Physical Health

# Get simulated quantities of interest at different confidence intervals
mysim <- m1 %>%
  setx(partisan_gap_2016 = seq(from = 0, to = 1, length.out = 50), 
       bonding = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bonding, partisan_gap_2016)

start <- bind_rows(
  mysim %>%
    group_by(bonding, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bonding, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bonding, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bonding, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bonding, partisan_gap_2016),
              names_from = type, values_from = c(lower, upper))

viza <- bind_rows(
  middle %>%
    select(bonding, partisan_gap_2016, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
    middle %>%
    select(bonding, partisan_gap_2016, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bonding", "partisan_gap_2016"),
            y = start %>%
              select(bonding, partisan_gap_2016, median))



# Now let's repeat for Mental Health
library(Zelig)

# Get simulated quantities of interest at different confidence intervals
mysim <- m2 %>%
  setx(partisan_gap_2016 = seq(from = 0, to = 1,length.out = 50), 
       bonding = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bonding, partisan_gap_2016)


start <- bind_rows(
  mysim %>%
    group_by(bonding, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bonding, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bonding, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bonding, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bonding, partisan_gap_2016),
              names_from = type, values_from = c(lower, upper))

vizb <- bind_rows(
   middle %>%
    select(bonding, partisan_gap_2016, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
    middle %>%
    select(bonding, partisan_gap_2016, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bonding, partisan_gap_2016, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bonding", "partisan_gap_2016"),
            y = start %>%
              select(bonding, partisan_gap_2016, median)) 

# Combine the two
viz <- bind_rows(viza,vizb, .id = "outcome") %>%
  mutate(outcome = outcome %>% recode_factor(
    "1" = "Physical\nHealth", "2" = "Mental\nHealth")) %>%
    mutate(bonding = factor(bonding) %>% 
           dplyr::recode_factor(
             "0" = "Low Bonding\nSocial Capital (0)",
             "1" = "High Bonding\nSocial Capital (1)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))


viz %>%
  filter(outcome == "Mental\nHealth") %>%
  ggplot(mapping = aes(x = partisan_gap_2016 * 100, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(~bonding, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "County Polarization\n(Gap in Democrat vs. Republican Presidential Vote (%) in 2016)",
       y = "Expected Days of Poor Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_bonding_county_zelig.png", dpi = 500, width = 7.5, height = 6.5)


vizbonding <- viz %>%
  filter(outcome == "Mental\nHealth")
```

### Bridging

```{r}
dat <- read_rds("county_dataset_mi.rds")

m1 <- dat %>%
  zelig(formula = days_poor_physical_health ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")

m2 <- dat %>%
  zelig(formula = days_poor_mental_health ~  
        partisan_gap_2016 * bonding +
        partisan_gap_2016 * bridging +
        partisan_gap_2016 * linking +
        obesity + republican_2016 + 
        median_age + 
        pop_black + pop_hisplat + pop_asian + pop_natam + pop_pacific +
        median_household_income + 
        pop_female +
        pop_some_college +
        pop_never_married + 
        pop_unemployed +
        protestant + catholic + jewish + muslim + other_adherents + uninsured +
          urban + state,
     model = "gamma")

library(Zelig)
# First for Physical Health

# Get simulated quantities of interest at different confidence intervals
mysim <- m1 %>%
  setx(partisan_gap_2016 = seq(from = 0, to = 1,length.out = 50), 
       bridging = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bridging, partisan_gap_2016)

start <- bind_rows(
  mysim %>%
    group_by(bridging, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bridging, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bridging, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bridging, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bridging, partisan_gap_2016),
              names_from = type, values_from = c(lower, upper))

viza <- bind_rows(
  middle %>%
    select(bridging, partisan_gap_2016, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
    middle %>%
    select(bridging, partisan_gap_2016, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bridging", "partisan_gap_2016"),
            y = start %>%
              select(bridging, partisan_gap_2016, median))



# Now let's repeat for Mental Health
library(Zelig)

# Get simulated quantities of interest at different confidence intervals
mysim <- m2 %>%
  setx(partisan_gap_2016 = seq(from = 0, to = 1,length.out = 50), 
       bridging = c(0, 1)) %>% 
  sim() %>%
  Zelig::zelig_qi_to_df() %>%
  select(ev = expected_value,
         bridging, partisan_gap_2016)


start <- bind_rows(
  mysim %>%
    group_by(bridging, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.05),
              upper = quantile(ev, 0.95),
              type = "90"),
  mysim %>%
    group_by(bridging, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.025),
              upper = quantile(ev, 0.975),
              type = "95"),
  mysim %>%
    group_by(bridging, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.005),
              upper = quantile(ev, 0.995),
              type = "99"),
  mysim %>%
    group_by(bridging, partisan_gap_2016) %>%
    summarize(median = median(ev),
              lower = quantile(ev, 0.0005),
              upper = quantile(ev, 0.9995),
              type = "999")) 

middle <- start %>%
  select(-median) %>%
  pivot_wider(id_cols = c(bridging, partisan_gap_2016),
              names_from = type, values_from = c(lower, upper))

vizb <- bind_rows(
   middle %>%
    select(bridging, partisan_gap_2016, lower = lower_999, upper = lower_99) %>%
    mutate(level = "99.9", type = "lower"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = lower_99, upper = lower_95) %>%
    mutate(level = "99", type = "lower"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = lower_95, upper = lower_90) %>%
    mutate(level = "95", type = "lower"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = lower_90, upper = upper_90) %>%
    mutate(level = "90", type = "upper"),
    middle %>%
    select(bridging, partisan_gap_2016, lower = upper_90, upper = upper_95) %>%
    mutate(level = "95", type = "upper"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = upper_95, upper = upper_99) %>%
    mutate(level = "99", type = "upper"),
  middle %>%
    select(bridging, partisan_gap_2016, lower = upper_99, upper = upper_999) %>%
    mutate(level = "99.9", type = "upper")) %>%
  # Join back in the median
  left_join(by = c("bridging", "partisan_gap_2016"),
            y = start %>%
              select(bridging, partisan_gap_2016, median)) 

# Combine the two
viz <- bind_rows(viza,vizb, .id = "outcome") %>%
  mutate(outcome = outcome %>% recode_factor(
    "1" = "Physical\nHealth", "2" = "Mental\nHealth")) %>%
    mutate(bridging = factor(bridging) %>% 
           dplyr::recode_factor(
             "0" = "Low Bridging\nSocial Capital (0)",
             "1" = "High Bridging\nSocial Capital (1)")) %>%
  mutate(model = factor(level) %>%
           dplyr::recode_factor(
             "99.9" = "99.9%",
             "99" = "99%",
             "95" = "95%",
             "90" = "90%")) %>%
  mutate(group = paste(level, type))


viz %>%
  filter(outcome == "Physical\nHealth") %>%
  ggplot(mapping = aes(x = partisan_gap_2016 * 100, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(outcome~bridging, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "County Polarization\n(Gap in Democrat vs. Republican Presidential Vote (%) in 2016)",
       y = "Expected Days of Poor Health",
       fill = "Confidence\nInterval") +
  ggsave("viz/fig_bridging_county_zelig.png", dpi = 500, width = 7.5, height = 6.5)


vizbridging <- viz %>%
  filter(outcome == "Physical\nHealth")
```

### Combined
```{r}

bind_rows(
  vizbonding %>%
    rename(social_capital = bonding), 
  vizbridging %>%
    rename(social_capital = bridging)) %>%
  ggplot(mapping = aes(x = partisan_gap_2016 * 100, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_wrap(outcome~social_capital, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "County Polarization\n(Gap in Democrat vs. Republican Presidential Vote (%) in 2016)",
       y = "Expected Days of Poor Health",
       fill = "Confidence\nInterval") 




g1 <- vizbonding %>%
  ggplot(mapping = aes(x = partisan_gap_2016 * 100, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(~bonding, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = NULL,
       y = "Expected Days of\nPoor Mental Health",
       fill = "Confidence\nInterval") 


g2 <- vizbridging %>%
  ggplot(mapping = aes(x = partisan_gap_2016 * 100, y = median, 
                       ymin = lower, ymax = upper,
                       group = group, fill = model)) +
  geom_ribbon(color = "white", size = 0.25) +
  geom_line(color = "white", linetype = "dashed") +
  scale_fill_grey(start = 0.8, end = 0) +
  facet_grid(~bridging, scales = "free_y") +
  theme_classic(base_size = 14) +
  theme(panel.spacing = unit(0.5, "cm"),
        panel.grid = element_blank(),
        strip.text.y = element_text(angle = 0),
        plot.caption = element_text(hjust = 0.5),
        panel.border = element_rect(color = "black", fill = NA)) +
  labs(x = "County Polarization\n(Gap in Democrat vs. Republican Presidential Vote (%) in 2016)",
       y = "Expected Days of\nPoor Physical Health",
       fill = "Confidence\nInterval") 

ggpubr::ggarrange(g1,g2, ncol = 1,
                  common.legend = TRUE, legend = "right") +
  ggsave("viz/fig_6.png", dpi = 500, width = 7.5, height = 6.5)
```
```{r}
remove(viz, viza,vizb, middle, mysim, start, dat, m1,m2, m3,m4,m5,m6,m7,m8)
```


# 8. Supplemental Analysis

## Figure 5

```{r}
library(tidyverse)



# First, we're going to visualize the average number of 
# days of poor physical health for people with 
# high social capital vs. low social capital
dat <- read_rds("dataset.rds")  %>%
  select(
    days_poor_physical_health, days_poor_mental_health,
    diff_self_us_voter_avg, diff_self_state_voter_avg,
    polarized_aggregate,
    bridging = diversity_friendship_network_index,
    bonding = social_trust_index,
    linking = help_from_local_govt) %>%
  # Recode social capital into dichotomous
  mutate_at(vars(bonding, bridging, linking),
            funs(ntile(., 2) %>% 
                   dplyr::recode_factor(
                     "2" = "Above Median", 
                     "1" = "Below Median"))) %>%
  # Pivot longer for easier visualization
  pivot_longer(
    cols = c(days_poor_mental_health, 
             days_poor_physical_health),
    names_to = "type",
    values_to = "outcome") %>%
  mutate(type = type %>% recode_factor(
    "days_poor_physical_health" = "Physical\nHealth",
    "days_poor_mental_health" = "Mental\nHealth")) %>%
  mutate(polarized_aggregate = case_when(
    polarized_aggregate == "same" ~ 0,
    polarized_aggregate == "different" ~ 10,
    TRUE ~ NA_real_))

out <- bind_rows(
  dat %>%
    rename(polarization = diff_self_us_voter_avg) %>%
    mutate(level = "US Perceived\nPolarization (1)"),
  dat %>%
    rename(polarization = diff_self_state_voter_avg) %>%
    mutate(level = "State Perceived\nPolarization (2)"),
  dat %>%
    rename(polarization = polarized_aggregate) %>%
    mutate(level = "County Actual\nPolarization (3)") %>%
    mutate(polarization = case_when(
      polarization == 10 ~ 1,
      polarization == 0 ~ 0,
      TRUE ~ NA_real_)))

viz <- bind_rows(
  out %>%
    rename(value = bonding) %>%
    mutate(social_capital = "Bonding\n(Social Trust\nIndex)"),
  out %>%
    mutate(social_capital = "Bridging\n(Friendship\nNetwork\nDiversity Index)") %>%
    rename(value = bridging),
  out %>%
    mutate(social_capital = "Linking\n(Trust in Local\nGovernment)") %>%
    rename(value = linking)
) %>%
  group_by(level, value, type, social_capital, polarization) %>%
  summarize(outcome = mean(outcome, na.rm = TRUE)) %>%
  ungroup() %>%
  filter(!is.na(value)) %>%
  mutate(level = factor(level, levels = c("US Perceived\nPolarization (1)", "State Perceived\nPolarization (2)", "County Actual\nPolarization (3)")))

remove(dat, out)

# Make custom scales for each panel
scales_x <- list(
  `US Perceived\nPolarization (1)` = scale_x_continuous(breaks = c(0, 5, 10)),
  `State Perceived\nPolarization (2)` = scale_x_continuous(breaks = c(0, 5, 10)),
  `County Actual\nPolarization (3)` = scale_x_continuous(breaks = c(0, 1))
)

# Use this facetscales package to get custom scales

#library(devtools)
#devtools::install_github("zeehio/facetscales")

library(facetscales)


g1 <- viz %>%
  filter(type == "Physical\nHealth") %>%
  ggplot(mapping = aes(x = polarization, y = outcome,
                       color = value, group = value)) +
  geom_point(size = 2, alpha = 0.75) +
  geom_smooth(method = "lm", se = FALSE) + 
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(color = "black", fill = NA),
        panel.spacing = unit(0.3, "cm"),
        legend.position = "bottom",
        plot.subtitle = element_text(hjust = 0.5),
        plot.background = element_rect(fill = "lightgrey", color = "black"),
        strip.text.y = element_blank()) +
  scale_color_grey()  +
  facet_grid_sc(rows = vars(social_capital), cols = vars(level), 
                scales = list(x = scales_x)) +
  labs(
    x = NULL,
    #x = "Polarization (by Measure No. in Table 1)\n (Less Polarized to More Polarized)",
       y = "Median Days of Poor Health",
       subtitle = "Physical Health",
       color = "Individual-Level Social Capital")

g2 <- viz %>%
  filter(type == "Mental\nHealth") %>%
  ggplot(mapping = aes(x = polarization, y = outcome,
                       color = value, group = value)) +
  geom_point(size = 2, alpha = 0.75) +
  geom_smooth(method = "lm", se = FALSE) + 
  theme_classic(base_size = 14) +
  theme(panel.border = element_rect(color = "black", fill = NA),
        panel.spacing = unit(0.3, "cm"),
        legend.position = "bottom",
        plot.background = element_rect(fill = "lightgrey", color = "black"),
        plot.subtitle = element_text(hjust = 0.5),
        strip.text.y = element_text(angle = 0),
        axis.text.y = element_blank(),
        axis.ticks.y = element_blank()) +
  scale_color_grey()  +
 # scale_x_continuous(breaks = c(0, 5, 10),
#                     labels = c(0, 5, 10)) +
  facet_grid_sc(rows = vars(social_capital), cols = vars(level), 
                scales = list(x = scales_x)) +
  labs(#x = "Polarization (by Measure No. in Table 1)\n (Less Polarized to More Polarized)",
    y = NULL,   
    x = NULL,
    #y = "Median Days of Poor Health",
       subtitle = "Mental Health",
       color = "Individual-Level Social Capital") 

library(ggpubr)

ggpubr::ggarrange(plotlist = list(g1,g2), common.legend = TRUE, legend = "top", 
                  widths = c(1.98, 2.25)) %>%
  ggpubr::annotate_figure(bottom = ggpubr::text_grob("Individual-Level Political Polarization\n(Less Polarized to More Polarized)", color = "black", hjust = 0.5, x = 0.5, vjust = 0.5, size = 16)) +
#  ggpubr::annotate_figure(bottom = ggpubr::text_grob("Note: Panels left to right depict Polarization Measures 1-3 from Table 1.", color = "black", face = "italic", hjust = 0.5, x = 0.5, vjust = 0.25, size = 12)) +
  ggsave("viz/fig_5.png", dpi = 500, width = 10, height = 6)


```




